Move all source/header files to hsakmt subfolder

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
author: Oded Gabbay <oded.gabbay@gmail.com> 2015-09-30 11:42:21 +0300
committer: Oded Gabbay <oded.gabbay@gmail.com> 2015-09-30 11:43:59 +0300
commit: 27675a5f87f0c11ab8a59f119518f627598c4caf (patch)
tree: 62dbe253bbd7df7b2e85d84668b89ce7adda6e86 /hsakmt
parent: bbdfa9eeb6dd015f22479368d2440d62785a4bb8 (diff)
21 files changed, 5380 insertions, 0 deletions
diff --git a/hsakmt/Makefile b/hsakmt/Makefile
new file mode 100644
index 0000000..5608ab7
--- /dev/null
+++ b/hsakmt/Makefile
@@ -0,0 +1,53 @@
+# Include directories
+INCLUDES += ../include
+CFLAGS += $(foreach DIR,$(INCLUDES),-I$(DIR))
+
+LIB_NAME = libhsakmt.so
+LIB_MAJOR_VER = 1
+
+# Compiler options
+CFLAGS += -fPIC # Position-independent code required to build shared library
+CFLAGS += -W -Wall -Wextra -Werror -Wno-unused-parameter
+CFLAGS += -Wformat-security -Wswitch-default -Wundef \
+	  -Wshadow -Wpointer-arith -Wbad-function-cast -Wcast-qual \
+	  -Wlogical-op -Wstrict-prototypes -Wmissing-prototypes    \
+	  -Wmissing-declarations -Wredundant-decls                 \
+	  -Wunreachable-code
+CFLAGS += -std=gnu99 -ggdb -pthread -fvisibility=hidden -O2
+
+LDFLAGS += -lrt -pthread -Wl,--version-script=libhsakmt.ver -Wl,-soname=$(LIB_NAME).$(LIB_MAJOR_VER)
+
+OBJS = debug.o globals.o memory.o perfctr.o time.o version.o \
+    events.o openclose.o queues.o topology.o fmm.o pmc_table.o \
+    libhsakmt.o
+
+.PHONY: all lnx lnx64a clean
+
+# Default target
+all: lnx lnx64a
+
+BUILD_ROOT = ../build
+BUILDDIR = $(BUILD_ROOT)/$(MAKECMDGOALS)
+
+TARGET = $(addprefix $(BUILDDIR)/,$(OBJS))
+
+$(BUILDDIR)/$(LIB_NAME).$(LIB_MAJOR_VER): $(TARGET)
+	gcc -shared $(LDFLAGS) -o $@ $^
+
+$(BUILDDIR)/$(LIB_NAME): $(BUILDDIR)/$(LIB_NAME).$(LIB_MAJOR_VER)
+	@ln -sf $(LIB_NAME).$(LIB_MAJOR_VER) $(BUILDDIR)/$(LIB_NAME)
+
+lnx: CFLAGS += -m32
+lnx: LDFLAGS += -m32
+lnx: $(BUILDDIR)/$(LIB_NAME)
+
+lnx64a: $(BUILDDIR)/$(LIB_NAME)
+
+clean:
+	rm -rf $(BUILD_ROOT)
+
+#Rule
+$(BUILDDIR)/%.o: %.c ../include/hsakmt.h ../include/hsakmttypes.h ../include/linux/kfd_ioctl.h
+	@echo Compiling $^
+	@mkdir -p $(dir $@)
+	gcc $(CFLAGS) -c $< -o $@
diff --git a/hsakmt/debug.c b/hsakmt/debug.c
new file mode 100644
index 0000000..46f72e7
--- /dev/null
+++ b/hsakmt/debug.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+#include <stdlib.h>
+#include <string.h>
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgRegister(
+    HSAuint32       NodeId      //IN
+    )
+{
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+	CHECK_KFD_OPEN();
+
+	result = validate_nodeid(NodeId, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	struct kfd_ioctl_dbg_register_args args;
+	memset(&args, 0, sizeof(args));
+	args.gpu_id = gpu_id;
+	long  err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_REGISTER, &args);
+
+	if (err == 0)
+		result = HSAKMT_STATUS_SUCCESS;
+	else
+		result = HSAKMT_STATUS_ERROR;
+
+	return (result);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgUnregister(
+    HSAuint32       NodeId      //IN
+    )
+{
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+	CHECK_KFD_OPEN();
+
+	result = validate_nodeid(NodeId, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	struct kfd_ioctl_dbg_unregister_args args;
+	memset(&args, 0, sizeof(args));
+	args.gpu_id = gpu_id;
+	long  err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_UNREGISTER, &args);
+	if (err == 0)
+		result = HSAKMT_STATUS_SUCCESS;
+	else
+		result = HSAKMT_STATUS_ERROR;
+
+	return (result);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgWavefrontControl(
+    HSAuint32           NodeId,         //IN
+    HSA_DBG_WAVEOP      Operand,        //IN
+    HSA_DBG_WAVEMODE    Mode,           //IN
+    HSAuint32           TrapId,         //IN
+    HsaDbgWaveMessage*  DbgWaveMsgRing  //IN (? - see thunk API doc!)
+    )
+{
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+
+	struct kfd_ioctl_dbg_wave_control_args *args;
+
+	CHECK_KFD_OPEN();
+
+	result = validate_nodeid(NodeId, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	/* Determine Size  of the ioctl buffer */
+	uint32_t buff_size = sizeof(Operand) +
+				sizeof(Mode) + sizeof(TrapId) +
+				sizeof(DbgWaveMsgRing->DbgWaveMsg) +
+				sizeof(DbgWaveMsgRing->MemoryVA) +
+				sizeof(*args);
+
+	args = (struct kfd_ioctl_dbg_wave_control_args*) malloc(buff_size);
+	if (args == NULL)
+		return HSAKMT_STATUS_ERROR;
+
+	memset(args, 0, buff_size);
+
+	args->gpu_id = gpu_id;
+	args->buf_size_in_bytes = buff_size;
+
+	/* increment pointer to the start of the non fixed part */
+	unsigned char* run_ptr = (unsigned char*)args + sizeof(*args);
+
+	/* save variable content pointer for kfd */
+	args->content_ptr = (uint64_t) run_ptr;
+
+	/* insert items, and increment pointer accordingly */
+	*((HSA_DBG_WAVEOP*)run_ptr) = Operand;
+	run_ptr += sizeof(Operand);
+
+	*((HSA_DBG_WAVEMODE*)run_ptr) = Mode;
+	run_ptr += sizeof(Mode);
+
+	*((HSAuint32*)run_ptr) = TrapId;
+	run_ptr += sizeof(TrapId);
+
+	*((HsaDbgWaveMessageAMD*)run_ptr) = DbgWaveMsgRing->DbgWaveMsg;
+	run_ptr += sizeof(DbgWaveMsgRing->DbgWaveMsg);
+
+	*((void**)run_ptr) = DbgWaveMsgRing->MemoryVA;
+	run_ptr += sizeof(DbgWaveMsgRing->MemoryVA);
+
+	/* send to kernel */
+	long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_WAVE_CONTROL, args);
+
+	free (args);
+
+	if (err == 0)
+		return HSAKMT_STATUS_SUCCESS;
+	else
+		return HSAKMT_STATUS_ERROR;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgAddressWatch(
+    HSAuint32           NodeId,         //IN
+    HSAuint32           NumWatchPoints, //IN
+    HSA_DBG_WATCH_MODE  WatchMode[],    //IN
+    void*               WatchAddress[], //IN
+    HSAuint64           WatchMask[],    //IN, optional
+    HsaEvent*           WatchEvent[]    //IN, optional
+    )
+{
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+	struct kfd_ioctl_dbg_address_watch_args *args;
+	uint32_t buff_size;
+	uint32_t watch_mask_items, watch_event_items;
+	HSAuint32 i;
+
+	/*
+	 * Determine the size of the watch mask and event buffers
+	 * the value is NULL if and only if  no vector data should be attached
+	 */
+
+	watch_mask_items  = WatchMask[0] > 0 ? NumWatchPoints : 1;
+	watch_event_items = WatchEvent != NULL ? NumWatchPoints : 0;
+
+	CHECK_KFD_OPEN();
+
+	result = validate_nodeid(NodeId, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	if (NumWatchPoints > MAX_ALLOWED_NUM_POINTS)
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+
+	/*
+	 * Size and structure of the ioctl buffer is  dynamic in this case
+	 * Here we calculate the buff size.
+	 */
+
+	buff_size = sizeof(NumWatchPoints) +
+			(sizeof(WatchMode[0]) + sizeof(WatchAddress[0])) *
+			NumWatchPoints +
+			watch_mask_items * sizeof(HSAuint64) +
+			watch_event_items * sizeof(HsaEvent*)+
+			sizeof(*args);
+
+	args = (struct kfd_ioctl_dbg_address_watch_args*) malloc(buff_size);
+	if (args == NULL)
+		return HSAKMT_STATUS_ERROR;
+
+	memset(args, 0, buff_size);
+
+	args->gpu_id = gpu_id;
+	args->buf_size_in_bytes = buff_size;
+
+	/* increment pointer to the start of the non fixed part */
+	unsigned char* run_ptr = (unsigned char*)args + sizeof(*args);
+
+	/* save variable content pointer for kfd */
+	args->content_ptr = (uint64_t) run_ptr;
+
+	/* insert items, and increment pointer accordingly */
+	*((HSAuint32*)run_ptr) = NumWatchPoints;
+	run_ptr += sizeof(NumWatchPoints);
+
+	for (i = 0 ; i < NumWatchPoints ; i++) {
+		*((HSA_DBG_WATCH_MODE*)run_ptr) = WatchMode[i];
+		run_ptr += sizeof(WatchMode[i]);
+	}
+
+	for (i = 0 ; i < NumWatchPoints ; i++) {
+		*((void**)run_ptr) = WatchAddress[i];
+		run_ptr += sizeof(WatchAddress[i]);
+	}
+
+	for (i = 0 ; i < watch_mask_items ; i++) {
+		*((HSAuint64*)run_ptr) = WatchMask[i];
+		run_ptr += sizeof(WatchMask[i]);
+	}
+
+	for (i = 0 ; i < watch_event_items ; i++) {
+		*((HsaEvent**)run_ptr) = WatchEvent[i];
+		run_ptr += sizeof(WatchEvent[i]);
+	}
+
+	/* send to kernel */
+	long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_ADDRESS_WATCH, args);
+
+	free (args);
+
+	if (err != 0)
+		return HSAKMT_STATUS_ERROR;
+
+	return HSAKMT_STATUS_SUCCESS;
+}
diff --git a/hsakmt/events.c b/hsakmt/events.c
new file mode 100644
index 0000000..5d6835e
--- /dev/null
+++ b/hsakmt/events.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "linux/kfd_ioctl.h"
+
+static HSAuint64 *events_page = NULL;
+
+static bool IsSystemEventType(HSA_EVENTTYPE type)
+{
+	// Debug events behave as signal events.
+	return (type != HSA_EVENTTYPE_SIGNAL && type != HSA_EVENTTYPE_DEBUG_EVENT);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateEvent(
+    HsaEventDescriptor* EventDesc,              //IN
+    bool                ManualReset,            //IN
+    bool                IsSignaled,             //IN
+    HsaEvent**          Event                   //OUT
+    )
+{
+	CHECK_KFD_OPEN();
+
+	if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
+	{
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+	}
+
+	HsaEvent* e = malloc(sizeof(HsaEvent));
+	if (e == NULL)
+	{
+		return HSAKMT_STATUS_ERROR;
+	}
+
+	memset(e, 0, sizeof(*e));
+
+	struct kfd_ioctl_create_event_args args;
+	memset(&args, 0, sizeof(args));
+
+	args.event_type = EventDesc->EventType;
+	args.auto_reset = !ManualReset;
+
+	if (kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
+		free(e);
+		*Event = NULL;
+		return HSAKMT_STATUS_ERROR;
+	}
+
+	if (events_page == NULL && args.event_page_offset > 0) {
+		events_page = mmap(NULL, 4096, PROT_WRITE | PROT_READ,
+				MAP_SHARED, kfd_fd, args.event_page_offset);
+		if (events_page == NULL) {
+			hsaKmtDestroyEvent(e);
+			return HSAKMT_STATUS_ERROR;
+		}
+	}
+
+	if (args.event_page_offset > 0 && args.event_slot_index < KFD_SIGNAL_EVENT_LIMIT)
+		e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];
+
+	e->EventId = args.event_id;
+	e->EventData.EventType = EventDesc->EventType;
+	e->EventData.HWData1 = args.event_id;
+
+	e->EventData.HWData3 = args.event_trigger_data;
+
+	if (IsSignaled && !IsSystemEventType(e->EventData.EventType)) {
+		struct kfd_ioctl_set_event_args set_args;
+		memset(&set_args, 0, sizeof(set_args));
+		set_args.event_id = args.event_id;
+
+		kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &set_args);
+	}
+
+	*Event = e;
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyEvent(
+    HsaEvent*   Event    //IN
+    )
+{
+	CHECK_KFD_OPEN();
+
+	if (!Event)
+		return HSAKMT_STATUS_INVALID_HANDLE;
+
+	struct kfd_ioctl_destroy_event_args args;
+	memset(&args, 0, sizeof(args));
+
+	args.event_id = Event->EventId;
+
+	if (kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_EVENT, &args) != 0) {
+		return HSAKMT_STATUS_ERROR;
+	}
+
+	free(Event);
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetEvent(
+    HsaEvent*  Event    //IN
+    )
+{
+	CHECK_KFD_OPEN();
+
+	if (!Event)
+		return HSAKMT_STATUS_INVALID_HANDLE;
+
+	/* Although the spec is doesn't say, don't allow system-defined events to be signaled. */
+	if (IsSystemEventType(Event->EventData.EventType))
+		return HSAKMT_STATUS_ERROR;
+
+	struct kfd_ioctl_set_event_args args;
+	memset(&args, 0, sizeof(args));
+
+	args.event_id = Event->EventId;
+
+	if (kmtIoctl(kfd_fd, AMDKFD_IOC_SET_EVENT, &args) == -1)
+		return HSAKMT_STATUS_ERROR;
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtResetEvent(
+    HsaEvent*  Event    //IN
+    )
+{
+	CHECK_KFD_OPEN();
+
+	if (!Event)
+		return HSAKMT_STATUS_INVALID_HANDLE;
+
+	/* Although the spec is doesn't say, don't allow system-defined events to be signaled. */
+	if (IsSystemEventType(Event->EventData.EventType))
+		return HSAKMT_STATUS_ERROR;
+
+	struct kfd_ioctl_reset_event_args args;
+	memset(&args, 0, sizeof(args));
+
+	args.event_id = Event->EventId;
+
+	if (kmtIoctl(kfd_fd, AMDKFD_IOC_RESET_EVENT, &args) == -1)
+		return HSAKMT_STATUS_ERROR;
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtQueryEventState(
+    HsaEvent*  Event    //IN
+    )
+{
+	CHECK_KFD_OPEN();
+
+	if (!Event)
+		return HSAKMT_STATUS_INVALID_HANDLE;
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnEvent(
+    HsaEvent*   Event,          //IN
+    HSAuint32   Milliseconds    //IN
+    )
+{
+	if (!Event)
+		return HSAKMT_STATUS_INVALID_HANDLE;
+
+	return hsaKmtWaitOnMultipleEvents(&Event, 1, true, Milliseconds);
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnMultipleEvents(
+    HsaEvent*   Events[],       //IN
+    HSAuint32   NumEvents,      //IN
+    bool        WaitOnAll,      //IN
+    HSAuint32   Milliseconds    //IN
+    )
+{
+	CHECK_KFD_OPEN();
+
+	if (!Events)
+		return HSAKMT_STATUS_INVALID_HANDLE;
+
+	struct kfd_event_data *event_data = malloc(NumEvents * sizeof(struct kfd_event_data));
+	for (HSAuint32 i = 0; i < NumEvents; i++) {
+		event_data[i].event_id = Events[i]->EventId;
+		event_data[i].kfd_event_data_ext = (uint64_t)(uintptr_t)NULL;
+	}
+
+	struct kfd_ioctl_wait_events_args args;
+	memset(&args, 0, sizeof(args));
+
+	args.wait_for_all = WaitOnAll;
+	args.timeout = Milliseconds;
+	args.num_events = NumEvents;
+	args.events_ptr = (uint64_t)(uintptr_t)event_data;
+
+	HSAKMT_STATUS result;
+
+	if (kmtIoctl(kfd_fd, AMDKFD_IOC_WAIT_EVENTS, &args) == -1) {
+		result = HSAKMT_STATUS_ERROR;
+	}
+	else if (args.wait_result == KFD_IOC_WAIT_RESULT_TIMEOUT) {
+		result = HSAKMT_STATUS_WAIT_TIMEOUT;
+	}
+	else {
+		result = HSAKMT_STATUS_SUCCESS;
+		for (HSAuint32 i = 0; i < NumEvents; i++) {
+			if (Events[i]->EventData.EventType == HSA_EVENTTYPE_MEMORY) {
+				Events[i]->EventData.EventData.MemoryAccessFault.VirtualAddress = event_data[i].memory_exception_data.va;
+				result = gpuid_to_nodeid(event_data[i].memory_exception_data.gpu_id, &Events[i]->EventData.EventData.MemoryAccessFault.NodeId);
+				if (result != HSAKMT_STATUS_SUCCESS)
+					goto out;
+				Events[i]->EventData.EventData.MemoryAccessFault.Failure.NotPresent = event_data[i].memory_exception_data.failure.NotPresent;
+				Events[i]->EventData.EventData.MemoryAccessFault.Failure.ReadOnly = event_data[i].memory_exception_data.failure.ReadOnly;
+				Events[i]->EventData.EventData.MemoryAccessFault.Failure.NoExecute = event_data[i].memory_exception_data.failure.NoExecute;
+				Events[i]->EventData.EventData.MemoryAccessFault.Flags = HSA_EVENTID_MEMORY_FATAL_PROCESS;
+			}
+		}
+	}
+out:
+	free(event_data);
+
+	return result;
+}
diff --git a/hsakmt/fmm.c b/hsakmt/fmm.c
new file mode 100644
index 0000000..a90fb95
--- /dev/null
+++ b/hsakmt/fmm.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "fmm.h"
+#include "linux/kfd_ioctl.h"
+#include "libhsakmt.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+
+#define NON_VALID_GPU_ID 0
+#define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
+#define INIT_APERTURE(base_value, limit_value) {.base = (void*)base_value, .limit = (void*)limit_value }
+#define INIT_MANAGEBLE_APERTURE(base_value, limit_value) {.base = (void*)base_value,.limit = (void*)limit_value, .vm_ranges = NULL, .vm_objects = NULL, .fmm_mutex = PTHREAD_MUTEX_INITIALIZER}
+#define INIT_GPU_MEM \
+{		.gpu_id = NON_VALID_GPU_ID,\
+		.lds_aperture = INIT_APERTURE(0, 0), \
+		.scratch_aperture = INIT_MANAGEBLE_APERTURE(0, 0),\
+		.gpuvm_aperture =  INIT_MANAGEBLE_APERTURE(0, 0)\
+}
+
+#define INIT_GPUs_MEM {[0 ... (NUM_OF_SUPPORTED_GPUS-1)] = INIT_GPU_MEM}
+struct vm_object{
+	void* start;
+	HSAuint64 size;
+	HSAuint64 handle; // opaque
+	struct vm_object* next;
+	struct vm_object* prev;
+};
+typedef struct vm_object vm_object_t;
+
+struct vm_area{
+	void* start;
+	void* end;
+	struct vm_area* next;
+	struct vm_area* prev;
+};
+typedef struct vm_area vm_area_t;
+
+typedef struct {
+	void* base;
+	void* limit;
+	vm_area_t* vm_ranges;
+	vm_object_t* vm_objects;
+	pthread_mutex_t fmm_mutex;
+} manageble_aperture_t;
+
+typedef struct {
+	void* base;
+	void* limit;
+} aperture_t;
+
+typedef struct{
+	HSAuint32 gpu_id;
+	aperture_t lds_aperture;
+	manageble_aperture_t scratch_aperture;
+	manageble_aperture_t gpuvm_aperture;
+}gpu_mem_t;
+
+static gpu_mem_t gpu_mem[] = INIT_GPUs_MEM;
+
+static vm_area_t* vm_create_and_init_area(void* start, void* end){
+	vm_area_t* area = (vm_area_t*)malloc(sizeof(vm_area_t));// TODO: Memory pool ???
+	if (area){
+		area->start = start;
+		area->end = end;
+		area->next = area->prev = NULL;
+	}
+
+	return area;
+}
+
+static vm_object_t* vm_create_and_init_object(void* start, uint64_t size, uint64_t handle){
+	vm_object_t* object = (vm_object_t*)malloc(sizeof(vm_object_t)); // TODO: Memory pool ???
+	if (object){
+		object->start = start;
+		object->size = size;
+		object->handle = handle;
+		object->next = object->prev = NULL;
+	}
+
+	return object;
+}
+
+
+static void vm_remove_area(manageble_aperture_t* app, vm_area_t* area){
+	vm_area_t* next;
+	vm_area_t* prev;
+
+	next = area->next;
+	prev = area->prev;
+
+	if (prev == NULL )// The first element
+		app->vm_ranges = next;
+	else
+		prev->next = next;
+
+	if(next) // If not the last element
+		next->prev = prev;
+
+	free(area);
+
+}
+
+static void vm_remove_object(manageble_aperture_t* app, vm_object_t* object){
+	vm_object_t* next;
+	vm_object_t* prev;
+
+	next = object->next;
+	prev = object->prev;
+
+	if (prev == NULL )// The first element
+		app->vm_objects = next;
+	else
+		prev->next = next;
+
+	if(next) // If not the last element
+		next->prev = prev;
+
+	free(object);
+
+}
+
+
+
+static void vm_add_area_after(vm_area_t* after_this, vm_area_t* new_area){
+	vm_area_t* next = after_this->next;
+	after_this->next = new_area;
+	new_area->next = next;
+
+	new_area->prev = after_this;
+	if (next)
+		next->prev = new_area;
+}
+
+static void vm_add_object_before(vm_object_t* before_this, vm_object_t* new_object){
+	vm_object_t* prev = before_this->prev;
+	before_this->prev = new_object;
+	new_object->next = before_this;
+
+	new_object->prev = prev;
+	if (prev)
+		prev->next = new_object;
+}
+
+static void vm_split_area(manageble_aperture_t* app, vm_area_t* area, void* address, uint64_t MemorySizeInBytes){
+
+	// The existing area is split to: [area->start, address - 1] and [address + MemorySizeInBytes, area->end]
+	vm_area_t* new_area = vm_create_and_init_area(VOID_PTR_ADD(address,MemorySizeInBytes), area->end);
+
+	// Shrink the existing area
+	area->end = VOID_PTR_SUB(address,1);
+
+	vm_add_area_after(area, new_area);
+
+}
+
+static vm_object_t* vm_find_object_by_address(manageble_aperture_t* app, void* address, uint64_t size){
+	vm_object_t* cur = app->vm_objects;
+
+	// Look up the appropriate address range containing the given address
+	while(cur){
+		if(cur->start == address && cur->size == size)
+			break;
+		cur = cur->next;
+	};
+
+	return cur; // NULL if not found
+}
+
+static vm_area_t* vm_find(manageble_aperture_t* app, void* address){
+	vm_area_t* cur = app->vm_ranges;
+
+	// Look up the appropriate address range containing the given address
+	while(cur){
+		if(cur->start <= address && cur->end >= address)
+			break;
+		cur = cur->next;
+	};
+
+	return cur; // NULL if not found
+}
+
+static bool aperture_is_valid(void* app_base, void* app_limit){
+	if (app_base && app_limit && app_base < app_limit)
+		return true;
+	return false;
+}
+
+/*
+ * Assumes that fmm_mutex is locked on entry.
+ */
+static int aperture_release(manageble_aperture_t* app, void* address, uint64_t MemorySizeInBytes){
+	int rc = -1;
+	vm_area_t* area;
+
+	area = vm_find(app, address);
+	vm_object_t* object = vm_find_object_by_address(app, address, MemorySizeInBytes);
+	if (object && area){
+		vm_remove_object(app, object);
+		if (VOID_PTRS_SUB(area->end, area->start) + 1 > MemorySizeInBytes){ // the size of the released block is less than the size of area
+			if (area->start == address){ // shrink from the start
+				area->start = VOID_PTR_ADD(area->start,MemorySizeInBytes);
+			} else if (VOID_PTRS_SUB(area->end, address) + 1 == MemorySizeInBytes){ // shrink from the end
+				area->end = VOID_PTR_SUB(area->end, MemorySizeInBytes);
+			} else { // split the area
+				vm_split_area(app, area, address, MemorySizeInBytes);
+			}
+			rc = 0;
+		} else if (VOID_PTRS_SUB(area->end, area->start) + 1 == MemorySizeInBytes){ // the size of the released block is exactly the same as the size of area
+			vm_remove_area(app, area);
+			rc = 0;
+		} else {
+			//Inconsistent data. Fail it?
+			rc = -1;
+		}
+	}
+
+	return rc;
+}
+
+/*
+ * returns allocated address or NULL. Assumes, that fmm_mutex is locked on entry.
+ */
+static void* aperture_allocate(manageble_aperture_t* app, uint64_t MemorySizeInBytes){
+	vm_area_t* cur, *next, *new_area, *start;
+	vm_object_t* new_object;
+	void* new_address = NULL;
+	next = NULL;
+	new_area = NULL;
+
+	cur = app->vm_ranges;
+	if (cur){ // not empty
+
+		// Look up the appropriate address space "hole" or end of the list
+		while(cur){
+			next = cur->next;
+
+			// End of the list reached
+			if (!next)
+				break;
+
+			// address space "hole"
+			if ((VOID_PTRS_SUB(next->start,cur->end) >= MemorySizeInBytes))
+				break;
+
+			cur = next;
+		};
+
+		// If the new range is inside the reserved aperture
+		if (VOID_PTRS_SUB(app->limit, cur->end) + 1 >= MemorySizeInBytes){
+			// cur points to the last inspected element: the tail of the list or the found "hole"
+			// Just extend the existing region
+			new_address = VOID_PTR_ADD(cur->end, 1);
+			cur->end = VOID_PTR_ADD(cur->end, MemorySizeInBytes);
+		} else
+			new_address = NULL;
+
+	} else { // empty - create the first area
+		start = (void*)app->base;
+		new_area = vm_create_and_init_area(start, VOID_PTR_ADD(start, (MemorySizeInBytes - 1)));
+		if (new_area){
+			app->vm_ranges = new_area;
+			new_address = new_area->start;
+		}
+	}
+
+	// Allocate new object
+	if (new_address){
+		new_object = vm_create_and_init_object(new_address, MemorySizeInBytes, 0);
+		if (new_object){
+			if (app->vm_objects == NULL){ // empty list
+				// Update head
+				app->vm_objects = new_object;
+			} else {
+				// Add it before the first element
+				vm_add_object_before(app->vm_objects, new_object);
+				// Update head
+				app->vm_objects = new_object;
+			}
+		} else{
+			// Failed to allocate object: remove just allocated range and return NULL
+			aperture_release(app, new_address, MemorySizeInBytes);
+			new_address = NULL;
+		}
+	}
+
+	return new_address;
+
+}
+
+
+
+static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id){
+	int32_t i;
+
+	for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
+		if(gpu_mem[i].gpu_id == gpu_id)
+			return i;
+	}
+
+	return -1;
+}
+
+bool fmm_is_inside_some_aperture(void* address){
+
+	int32_t i;
+
+	for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
+		if(gpu_mem[i].gpu_id != NON_VALID_GPU_ID){
+			if ((address>= gpu_mem[i].lds_aperture.base) && (address<= gpu_mem[i].lds_aperture.limit))
+				return true;
+			if ((address>= gpu_mem[i].gpuvm_aperture.base) && (address<= gpu_mem[i].gpuvm_aperture.limit))
+				return true;
+			if ((address>= gpu_mem[i].scratch_aperture.base) && (address<= gpu_mem[i].scratch_aperture.limit))
+				return true;
+		}
+	}
+
+	return false;
+}
+
+#ifdef DEBUG_PRINT_APERTURE
+static void aperture_print(aperture_t* app){
+	printf("\t Base: %p\n", app->base);
+	printf("\t Limit: %p\n", app->limit);
+}
+
+static void manageble_aperture_print(manageble_aperture_t* app){
+	vm_area_t* cur = app->vm_ranges;
+	vm_object_t *object = app->vm_objects;
+
+	printf("\t Base: %p\n", app->base);
+	printf("\t Limit: %p\n", app->limit);
+	printf("\t Ranges: \n");
+	while(cur){
+		printf("\t\t Range [%p - %p] \n", cur->start, cur->end);
+		cur = cur->next;
+	};
+	printf("\t Objects: \n");
+	while(object){
+		printf("\t\t Object [%p - %" PRIu64 "] \n", object->start, object->size);
+		object = object->next;
+	};
+}
+
+void fmm_print(uint32_t gpu_id){
+	int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
+	if(i >= 0){ // Found
+		printf("LDS aperture: \n");
+		aperture_print(&gpu_mem[i].lds_aperture);
+		printf("GPUVM aperture: \n");
+		manageble_aperture_print(&gpu_mem[i].gpuvm_aperture);
+		printf("Scratch aperture: \n");
+		manageble_aperture_print(&gpu_mem[i].scratch_aperture);
+
+	}
+}
+#else
+void fmm_print(uint32_t gpu_id){
+
+}
+#endif
+
+
+void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes){
+
+	void* mem = NULL;
+	int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
+
+	// If not found or aperture isn't properly initialized/supported
+	if(i < 0 || !aperture_is_valid(gpu_mem[i].scratch_aperture.base, gpu_mem[i].scratch_aperture.limit))
+		return NULL;
+
+        pthread_mutex_lock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+	mem = aperture_allocate(&gpu_mem[i].scratch_aperture, MemorySizeInBytes);
+        pthread_mutex_unlock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+
+	return mem;
+}
+
+void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes){
+
+	void* mem = NULL;
+	int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
+
+	// If not found or aperture isn't properly initialized/supported
+	if(i < 0 || !aperture_is_valid(gpu_mem[i].gpuvm_aperture.base, gpu_mem[i].gpuvm_aperture.limit))
+		return NULL;
+
+	pthread_mutex_lock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+	mem = aperture_allocate(&gpu_mem[i].gpuvm_aperture, MemorySizeInBytes);
+        pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+
+	return mem;
+}
+
+
+int fmm_release(void* address, uint64_t MemorySizeInBytes){
+
+	uint32_t i;
+	int32_t rc = -1;
+
+	for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
+		if(gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
+			continue;
+
+		if (address >= gpu_mem[i].gpuvm_aperture.base && address <= gpu_mem[i].gpuvm_aperture.limit){
+	        pthread_mutex_lock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+			rc = aperture_release(&gpu_mem[i].gpuvm_aperture, address, MemorySizeInBytes);
+	        pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
+			fmm_print(gpu_mem[i].gpu_id);
+		} else if (address >= gpu_mem[i].scratch_aperture.base && address <= gpu_mem[i].scratch_aperture.limit)
+	        pthread_mutex_lock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+			rc = aperture_release(&gpu_mem[i].scratch_aperture, address, MemorySizeInBytes);
+			pthread_mutex_unlock(&gpu_mem[i].scratch_aperture.fmm_mutex);
+	}
+
+	return rc;
+}
+
+HSAKMT_STATUS fmm_init_process_apertures(){
+	struct kfd_ioctl_get_process_apertures_args args;
+	uint8_t node_id;
+
+	if (0 == kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, (void*)&args)){
+		for(node_id = 0; node_id < args.num_of_nodes; node_id++){
+			gpu_mem[node_id].gpu_id = args.process_apertures[node_id].gpu_id;
+			gpu_mem[node_id].lds_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_base);
+			gpu_mem[node_id].lds_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_limit);
+			gpu_mem[node_id].gpuvm_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_base);
+			gpu_mem[node_id].gpuvm_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_limit);
+			gpu_mem[node_id].scratch_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_base);
+			gpu_mem[node_id].scratch_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_limit);
+		}
+
+		return HSAKMT_STATUS_SUCCESS;
+	}
+
+	return HSAKMT_STATUS_ERROR;
+
+}
+
+HSAuint64 fmm_get_aperture_base(aperture_type_e aperture_type, HSAuint32 gpu_id){
+	int32_t slot = gpu_mem_find_by_gpu_id(gpu_id);
+	if (slot<0)
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+
+	switch(aperture_type){
+	case FMM_GPUVM:
+		return aperture_is_valid(gpu_mem[slot].gpuvm_aperture.base, gpu_mem[slot].gpuvm_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].gpuvm_aperture.base) : 0;
+		break;
+	case FMM_SCRATCH:
+		return aperture_is_valid(gpu_mem[slot].scratch_aperture.base, gpu_mem[slot].scratch_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].scratch_aperture.base) : 0;
+		break;
+	case FMM_LDS:
+		return aperture_is_valid(gpu_mem[slot].lds_aperture.base, gpu_mem[slot].lds_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].lds_aperture.base) : 0;
+		break;
+	default:
+		return 0;
+	}
+
+}
diff --git a/hsakmt/fmm.h b/hsakmt/fmm.h
new file mode 100644
index 0000000..5924247
--- /dev/null
+++ b/hsakmt/fmm.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef FMM_H_
+#define FMM_H_
+
+#include "hsakmttypes.h"
+#include <stddef.h>
+
+typedef enum {
+	FMM_FIRST_APERTURE_TYPE = 0,
+	FMM_GPUVM = FMM_FIRST_APERTURE_TYPE,
+	FMM_LDS,
+	FMM_SCRATCH,
+	FMM_LAST_APERTURE_TYPE
+} aperture_type_e;
+
+typedef struct {
+	aperture_type_e app_type;
+	uint64_t size;
+	void* start_address;
+} aperture_properties_t;
+
+HSAKMT_STATUS fmm_init_process_apertures(void);
+/*
+ * Memory interface
+ */
+void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes);
+void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes);
+void fmm_print(uint32_t node);
+bool fmm_is_inside_some_aperture(void* address);
+int fmm_release(void* address, HSAuint64 MemorySizeInBytes);
+
+/* Topology interface*/
+HSAKMT_STATUS fmm_node_added(HSAuint32 gpu_id);
+HSAKMT_STATUS fmm_node_removed(HSAuint32 gpu_id);
+HSAuint64 fmm_get_aperture_base(aperture_type_e aperture_type, HSAuint32 gpu_id);
+#endif /* FMM_H_ */
diff --git a/hsakmt/globals.c b/hsakmt/globals.c
new file mode 100644
index 0000000..cad6b1f
--- /dev/null
+++ b/hsakmt/globals.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+
+// HSAKMT global data
+
+int kfd_fd;
+unsigned long kfd_open_count;
+unsigned long system_properties_count;
+pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER;
diff --git a/hsakmt/hsakmt.h b/hsakmt/hsakmt.h
new file mode 100644
index 0000000..c87b3f8
--- /dev/null
+++ b/hsakmt/hsakmt.h
@@ -0,0 +1,577 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _HSAKMT_H_
+#define _HSAKMT_H_
+
+#include "hsakmttypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+  "Opens" the HSA kernel driver for user-kernel mode communication.
+
+  On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
+  is responsible for user-kernel communication, this handle is used internally by
+  the thunk library to send device I/O control to the HSA kernel driver.
+  No other thunk library function may be called unless the user-kernel communication
+  channel is opened first.
+
+  On Linux this call opens the "/dev/kfd" device file to establish a communication
+  path to the kernel.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtOpenKFD( void );
+
+/**
+  "Closes" the user-kernel communication path.
+
+  On Windows, the handle obtained by the hsaKmtOpenKFD() function is closed;
+  no other communication with the kernel driver is possible after the successful
+  execution of the saKmdCloseKFD() function. Depending on the failure reason,
+  the user-kernel communication path may or may not be still active.
+
+  On Linux the function closes the "dev/kfd" device file.
+  No further communication to the kernel driver is allowed until hsaKmtOpenKFD()
+  function is called again.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCloseKFD( void );
+
+
+/**
+  Returns the user-kernel interface version supported by KFD.
+  Higher major numbers usually add new features to KFD and may break user-kernel
+  compatibility; higher minor numbers define additional functionality associated
+  within a major number.
+  The calling software should validate that it meets the minimum interface version
+  as described in the API specification.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetVersion(
+    HsaVersionInfo*  VersionInfo    //OUT
+    );
+
+/**
+  The function takes a "snapshot" of the topology information within the KFD
+  to avoid any changes during the enumeration process.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAcquireSystemProperties(
+    HsaSystemProperties*  SystemProperties    //OUT
+    );
+
+/**
+  Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReleaseSystemProperties( void ) ;
+
+/**
+  Retrieves the discoverable sub-properties for a given HSA
+  node. The parameters returned allow the application or runtime to size the
+  management structures necessary to store the information.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeProperties(
+    HSAuint32               NodeId,            //IN
+    HsaNodeProperties*      NodeProperties     //OUT
+    );
+
+/**
+  Retrieves the memory properties of a specific HSA node.
+  the memory pointer passed as MemoryProperties is sized as
+  NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
+  hsaKmtGetNodeProperties() call.
+
+  Some of the data returned is optional. Not all implementations may return all
+  parameters in the hsaMemoryProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeMemoryProperties(
+    HSAuint32             NodeId,             //IN
+    HSAuint32             NumBanks,           //IN
+    HsaMemoryProperties*  MemoryProperties    //OUT
+    );
+
+/**
+  Retrieves the cache properties of a specific HSA node and processor ID.
+  ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
+  via the hsaKmtGetNodeProperties() call.
+  The memory pointer passed as CacheProperties is sized as
+  NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
+  hsaKmtGetNodeProperties() call.
+
+  The data returned is optional. Not all implementations may return all
+  parameters in the CacheProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeCacheProperties(
+    HSAuint32           NodeId,         //IN
+    HSAuint32           ProcessorId,    //IN
+    HSAuint32           NumCaches,      //IN
+    HsaCacheProperties* CacheProperties //OUT
+    );
+
+/**
+  Retrieves the HSA IO affinity properties of a specific HSA node.
+  the memory pointer passed as Properties is sized as
+  NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
+  hsaKmtGetNodeProperties() call.
+
+  The data returned is optional. Not all implementations may return all
+  parameters in the IoLinkProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeIoLinkProperties(
+    HSAuint32            NodeId,            //IN
+    HSAuint32            NumIoLinks,        //IN
+    HsaIoLinkProperties* IoLinkProperties  //OUT
+    );
+
+
+
+/**
+  Creates an operating system event associated with a HSA event ID
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateEvent(
+    HsaEventDescriptor* EventDesc,              //IN
+    bool                ManualReset,            //IN
+    bool                IsSignaled,             //IN
+    HsaEvent**          Event                   //OUT
+    );
+
+/**
+  Destroys an operating system event associated with a HSA event ID
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyEvent(
+    HsaEvent*   Event    //IN
+    );
+
+/**
+  Sets the specified event object to the signaled state
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetEvent(
+    HsaEvent*  Event    //IN
+    );
+
+/**
+  Sets the specified event object to the non-signaled state
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtResetEvent(
+    HsaEvent*  Event    //IN
+    );
+
+/**
+  Queries the state of the specified event object
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtQueryEventState(
+    HsaEvent*  Event    //IN
+    );
+
+/**
+  Checks the current state of the event object. If the object's state is
+  nonsignaled, the calling thread enters the wait state.
+
+ The function returns when one of the following occurs:
+- The specified event object is in the signaled state.
+- The time-out interval elapses.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnEvent(
+    HsaEvent*   Event,          //IN
+    HSAuint32   Milliseconds    //IN
+    );
+
+/**
+  Checks the current state of multiple event objects.
+
+ The function returns when one of the following occurs:
+- Either any one or all of the specified objects are in the signaled state
+  - if "WaitOnAll" is "true" the function returns when the state of all
+    objects in array is signaled
+  - if "WaitOnAll" is "false" the function returns when the state of any
+    one of the objects is set to signaled
+- The time-out interval elapses.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnMultipleEvents(
+    HsaEvent*   Events[],       //IN
+    HSAuint32   NumEvents,      //IN
+    bool        WaitOnAll,      //IN
+    HSAuint32   Milliseconds    //IN
+    );
+
+/**
+  new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform
+  If used on other platforms the function will return HSAKMT_STATUS_ERROR
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReportQueue(
+    HSA_QUEUEID     QueueId,        //IN
+    HsaQueueReport* QueueReport     //OUT
+    );
+
+/**
+  Creates a GPU queue with user-mode access rights
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateQueue(
+    HSAuint32           NodeId,           //IN
+    HSA_QUEUE_TYPE      Type,             //IN
+    HSAuint32           QueuePercentage,  //IN
+    HSA_QUEUE_PRIORITY  Priority,         //IN
+    void*               QueueAddress,     //IN
+    HSAuint64           QueueSizeInBytes, //IN
+    HsaEvent*           Event,            //IN
+    HsaQueueResource*   QueueResource     //OUT
+    );
+
+/**
+  Updates a queue
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUpdateQueue(
+    HSA_QUEUEID         QueueId,        //IN
+    HSAuint32           QueuePercentage,//IN
+    HSA_QUEUE_PRIORITY  Priority,       //IN
+    void*               QueueAddress,   //IN
+    HSAuint64           QueueSize,      //IN
+    HsaEvent*           Event           //IN
+    );
+
+/**
+  Destroys a queue
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyQueue(
+    HSA_QUEUEID         QueueId         //IN
+    );
+
+/**
+  Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch. 
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetMemoryPolicy(
+    HSAuint32       Node,                       //IN
+    HSAuint32       DefaultPolicy,     	   	    //IN  
+    HSAuint32       AlternatePolicy,       	    //IN  
+    void*           MemoryAddressAlternate,     //IN (page-aligned)
+    HSAuint64       MemorySizeInBytes   	    //IN (page-aligned)
+    );
+/**
+  Allocates a memory buffer that may be accessed by the GPU
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAllocMemory(
+    HSAuint32       PreferredNode,          //IN
+    HSAuint64       SizeInBytes,            //IN  (multiple of page size)
+    HsaMemFlags     MemFlags,               //IN
+    void**          MemoryAddress           //OUT (page-aligned)
+    );
+
+/**
+  Frees a memory buffer
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtFreeMemory(
+    void*       MemoryAddress,      //IN (page-aligned)
+    HSAuint64   SizeInBytes         //IN
+    );
+
+/**
+  Registers with KFD a memory buffer that may be accessed by the GPU
+  This function will never be required for Linux
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtRegisterMemory(
+    void*       MemoryAddress,      //IN (page-aligned)
+    HSAuint64   MemorySizeInBytes   //IN (page-aligned)
+    );
+
+
+/**
+  Unregisters with KFD a memory buffer
+  This function will never be required for Linux
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDeregisterMemory(
+    void*       MemoryAddress  //IN
+    );
+
+
+/**
+  Ensures that the memory is resident and can be accessed by GPU
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtMapMemoryToGPU(
+    void*           MemoryAddress,     //IN (page-aligned)
+    HSAuint64       MemorySizeInBytes, //IN (page-aligned)
+    HSAuint64*      AlternateVAGPU     //OUT (page-aligned)     
+    );
+
+/**
+  Releases the residency of the memory
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUnmapMemoryToGPU(
+    void*           MemoryAddress       //IN (page-aligned)
+    );
+
+
+/**
+  Notifies the kernel driver that a process wants to use GPU debugging facilities
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgRegister(
+    HSAuint32       NodeId      //IN
+    );
+
+/**
+  Detaches the debugger process from the HW debug established by hsaKmtDbgRegister() API
+*/
+
+HSAKMT_STATUS 
+HSAKMTAPI 
+hsaKmtDbgUnregister(
+    HSAuint32       NodeId      //IN
+    );
+
+/**
+  Controls a wavefront
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgWavefrontControl(
+    HSAuint32           NodeId,         //IN
+    HSA_DBG_WAVEOP      Operand,        //IN
+    HSA_DBG_WAVEMODE    Mode,           //IN
+    HSAuint32           TrapId,         //IN
+    HsaDbgWaveMessage*  DbgWaveMsgRing  //IN
+    );
+
+/**
+  Sets watch points on memory address ranges to generate exception events when the
+  watched addresses are  accessed
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgAddressWatch(
+    HSAuint32           NodeId,         //IN
+    HSAuint32           NumWatchPoints, //IN
+    HSA_DBG_WATCH_MODE  WatchMode[],    //IN
+    void*               WatchAddress[], //IN
+    HSAuint64           WatchMask[],    //IN, optional
+    HsaEvent*           WatchEvent[]    //IN, optional
+    );
+
+/**
+  Gets GPU and CPU clock counters for particular Node
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetClockCounters(
+    HSAuint32         NodeId,  //IN
+    HsaClockCounters* Counters //OUT
+    );
+
+/**
+  Retrieves information on the available HSA counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcGetCounterProperties(
+    HSAuint32                   NodeId,             //IN
+    HsaCounterProperties**      CounterProperties   //OUT
+    );
+
+/**
+  Registers a set of (HW) counters to be used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcRegisterTrace(
+    HSAuint32           NodeId,             //IN
+    HSAuint32           NumberOfCounters,   //IN
+    HsaCounter*         Counters,           //IN
+    HsaPmcTraceRoot*    TraceRoot           //OUT
+    );
+
+/**
+  Unregisters a set of (HW) counters used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcUnregisterTrace(
+    HSAuint32   NodeId,     //IN
+    HSATraceId  TraceId     //IN
+    );
+
+/**
+  Allows a user mode process to get exclusive access to the defined set of (HW) counters
+  used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcAcquireTraceAccess(
+    HSAuint32   NodeId,     //IN
+    HSATraceId  TraceId     //IN
+    );
+
+/**
+  Allows a user mode process to release exclusive access to the defined set of (HW) counters
+  used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcReleaseTraceAccess(
+    HSAuint32   NodeId,     //IN
+    HSATraceId  TraceId     //IN
+    );
+
+/**
+  Starts tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStartTrace(
+    HSATraceId  TraceId,                //IN
+    void*       TraceBuffer,            //IN (page aligned) 
+    HSAuint64   TraceBufferSizeBytes    //IN (page aligned)
+    );
+
+/**
+   Forces an update of all the counters that a previously started trace operation has registered
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcQueryTrace(
+    HSATraceId    TraceId   //IN
+    );
+
+/**
+  Stops tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStopTrace(
+    HSATraceId  TraceId     //IN
+    );
+
+/**
+  Sets trap handler and trap buffer to be used for all queues associated with the specified NodeId within this process context
+*/
+
+HSAKMT_STATUS 
+HSAKMTAPI 
+hsaKmtSetTrapHandler(
+    HSAuint32           NodeId,                   //IN
+    void*               TrapHandlerBaseAddress,   //IN
+    HSAuint64           TrapHandlerSizeInBytes,   //IN
+    void*               TrapBufferBaseAddress,    //IN
+    HSAuint64           TrapBufferSizeInBytes     //IN
+    );
+
+#ifdef __cplusplus
+}   //extern "C"
+#endif
+
+#endif //_HSAKMT_H_
+
diff --git a/hsakmt/hsakmttypes.h b/hsakmt/hsakmttypes.h
new file mode 100644
index 0000000..a7e0a81
--- /dev/null
+++ b/hsakmt/hsakmttypes.h
@@ -0,0 +1,909 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _HSAKMTTYPES_H_
+#define _HSAKMTTYPES_H_
+
+//the definitions and THUNK API are version specific - define the version numbers here
+#define HSAKMT_VERSION_MAJOR    0
+#define HSAKMT_VERSION_MINOR    99
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32)
+
+    #if defined(_WIN32)
+        #define HSAKMTAPI  __stdcall
+    #else
+        #define HSAKMTAPI
+    #endif
+
+    typedef unsigned char      HSAuint8;
+    typedef char               HSAint8;
+    typedef unsigned short     HSAuint16;
+    typedef signed short       HSAint16;
+    typedef unsigned __int32   HSAuint32;
+    typedef signed __int64     HSAint64;
+    typedef unsigned __int64   HSAuint64;
+
+#elif defined(__linux__)
+
+#include <stdbool.h>
+#include <stdint.h>
+
+    #define HSAKMTAPI
+
+    typedef uint8_t     HSAuint8;
+    typedef int8_t      HSAint8;
+    typedef uint16_t	HSAuint16;
+    typedef int16_t	HSAint16;
+    typedef uint32_t	HSAuint32;
+    typedef int64_t	HSAint64;
+    typedef uint64_t	HSAuint64;
+
+#endif
+
+typedef void*              HSA_HANDLE;
+typedef HSAuint64          HSA_QUEUEID;
+
+// This is included in order to force the alignments to be 4 bytes so that
+// it avoids extra padding added by the compiler when a 64-bit binary is generated.
+#pragma pack(push, hsakmttypes_h, 4)
+
+//
+// HSA STATUS codes returned by the KFD Interfaces
+//
+
+typedef enum _HSAKMT_STATUS
+{
+    HSAKMT_STATUS_SUCCESS                      = 0,  // Operation successful
+    HSAKMT_STATUS_ERROR                        = 1,  // General error return if not otherwise specified
+    HSAKMT_STATUS_DRIVER_MISMATCH              = 2,  // User mode component is not compatible with kernel HSA driver
+
+    HSAKMT_STATUS_INVALID_PARAMETER            = 3,  // KFD identifies input parameters invalid
+    HSAKMT_STATUS_INVALID_HANDLE               = 4,  // KFD identifies handle parameter invalid
+    HSAKMT_STATUS_INVALID_NODE_UNIT            = 5,  // KFD identifies node or unit parameter invalid
+
+    HSAKMT_STATUS_NO_MEMORY                    = 6,  // No memory available (when allocating queues or memory)
+    HSAKMT_STATUS_BUFFER_TOO_SMALL             = 7,  // A buffer needed to handle a request is too small
+
+    HSAKMT_STATUS_NOT_IMPLEMENTED              = 10, // KFD function is not implemented for this set of paramters
+    HSAKMT_STATUS_NOT_SUPPORTED                = 11, // KFD function is not supported on this node
+    HSAKMT_STATUS_UNAVAILABLE                  = 12, // KFD function is not available currently on this node (but
+                                                  // may be at a later time)
+
+    HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened
+    HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR   = 21, // user-kernel mode communication failure
+    HSAKMT_STATUS_KERNEL_ALREADY_OPENED        = 22, // KFD driver path already opened
+    HSAKMT_STATUS_HSAMMU_UNAVAILABLE           = 23, // ATS/PRI 1.1 (Address Translation Services) not available
+                                                  // (IOMMU driver not installed or not-available)
+
+    HSAKMT_STATUS_WAIT_FAILURE                 = 30, // The wait operation failed
+    HSAKMT_STATUS_WAIT_TIMEOUT                 = 31, // The wait operation timed out
+
+    HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED    = 35, // Memory buffer already registered
+    HSAKMT_STATUS_MEMORY_NOT_REGISTERED        = 36, // Memory buffer not registered
+    HSAKMT_STATUS_MEMORY_ALIGNMENT             = 37, // Memory parameter not aligned
+
+} HSAKMT_STATUS;
+
+//
+// HSA KFD interface version information. Calling software has to validate that it meets
+// the minimum interface version as described in the API specification.
+// All future structures will be extended in a backward compatible fashion.
+//
+
+typedef struct _HsaVersionInfo
+{
+    HSAuint32    KernelInterfaceMajorVersion;    // supported kernel interface major version
+    HSAuint32    KernelInterfaceMinorVersion;    // supported kernel interface minor version
+} HsaVersionInfo;
+
+//
+// HSA Topology Discovery Infrastructure structure definitions.
+// The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD
+// The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined
+// in the "Heterogeneous System Architecture Detail Topology" specification.
+//
+// The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output.
+// When the call is made within a process context, a "snapshot" of the topology information
+// is taken within the KFD to avoid any changes during the enumeration process.
+// The Snapshot is released when hsaKmtReleaseSystemProperties() is called
+// or when the process exits or is terminated.
+//
+
+typedef struct _HsaSystemProperties
+{
+    HSAuint32    NumNodes;         // the number of "H-NUMA" memory nodes.
+                                   // each node represents a discoverable node of the system
+                                   // All other enumeration is done on a per-node basis
+
+    HSAuint32    PlatformOem;      // identifies HSA platform, reflects the OEMID in the CRAT
+    HSAuint32    PlatformId;       // HSA platform ID, reflects OEM TableID in the CRAT
+    HSAuint32    PlatformRev;      // HSA platform revision, reflects Platform Table Revision ID
+} HsaSystemProperties;
+
+
+typedef union
+{
+    HSAuint32 Value;
+    struct
+    {
+        unsigned int HotPluggable        : 1;    // the node may be removed by some system action
+                                                 // (event will be sent)
+        unsigned int HSAMMUPresent       : 1;    // This node has an ATS/PRI 1.1 compatible
+                                                 // translation agent in the system (e.g. IOMMUv2)
+        unsigned int SharedWithGraphics  : 1;    // this HSA nodes' GPU function is also used for OS primary
+                                                 // graphics render (= UI)
+        unsigned int QueueSizePowerOfTwo : 1;    // This node GPU requires the queue size to be a power of 2 value
+        unsigned int QueueSize32bit      : 1;    // This node GPU requires the queue size to be less than 4GB
+        unsigned int QueueIdleEvent      : 1;    // This node GPU supports notification on Queue Idle
+        unsigned int VALimit             : 1;    // This node GPU has limited VA range for platform
+                                                 // (typical 40bit). Affects shared VM use for 64bit apps
+        unsigned int WatchPointsSupported: 1;	 // Indicates if Watchpoints are available on the node.
+        unsigned int WatchPointsTotalBits: 4;    // ld(Watchpoints) available. To determine the number use 2^value
+
+        unsigned int DoorbellType        : 2;    // 0: This node has pre-1.0 doorbell characteristic
+                                                 // 1: This node has 1.0 doorbell characteristic
+                                                 // 2,3: reserved for future use
+        unsigned int Reserved            : 18;
+    } ui32;
+} HSA_CAPABILITY;
+
+
+//
+// HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties()
+// The application or runtime can use the information herein to size the topology management structures
+// Unless there is some very weird setup, there is at most one "GPU" device (with a certain number
+// of throughput compute units (= SIMDs) associated with a H-NUMA node.
+//
+
+#define HSA_PUBLIC_NAME_SIZE        128
+
+typedef struct _HsaNodeProperties
+{
+    HSAuint32       NumCPUCores;       // # of latency (= CPU) cores present on this HSA node.
+                                       // This value is 0 for a HSA node with no such cores,
+                                       // e.g a "discrete HSA GPU"
+    HSAuint32       NumFComputeCores;  // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node.
+                                       // This value is 0 if no FCompute cores are present (e.g. pure "CPU node").
+    HSAuint32       NumMemoryBanks;    // # of discoverable memory bank affinity properties on this "H-NUMA" node.
+    HSAuint32       NumCaches;         // # of discoverable cache affinity properties on this "H-NUMA"  node.
+
+    HSAuint32       NumIOLinks;        // # of discoverable IO link affinity properties of this node
+                                       // connecting to other nodes.
+
+    HSAuint32       CComputeIdLo;      // low value of the logical processor ID of the latency (= CPU)
+                                       // cores available on this node
+    HSAuint32       FComputeIdLo;      // low value of the logical processor ID of the throughput (= GPU)
+                                       // units available on this node
+
+    HSA_CAPABILITY  Capability;        // see above
+
+    HSAuint32       MaxWavesPerSIMD;   // This identifies the max. number of launched waves per SIMD.
+                                       // If NumFComputeCores is 0, this value is ignored.
+    HSAuint32       LDSSizeInKB;       // Size of Local Data Store in Kilobytes per SIMD Wavefront
+    HSAuint32       GDSSizeInKB;       // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts
+
+    HSAuint32       WaveFrontSize;     // Number of SIMD cores per wavefront executed, typically 64,
+                                       // may be 32 or a different value for some HSA based architectures
+
+    HSAuint32       NumShaderBanks;    // Number of Shader Banks or Shader Engines, typical values are 1 or 2
+
+
+    HSAuint32       NumArrays;         // Number of SIMD arrays per engine
+    HSAuint32       NumCUPerArray;     // Number of Compute Units (CU) per SIMD array
+    HSAuint32       NumSIMDPerCU;      // Number of SIMD representing a Compute Unit (CU)
+
+    HSAuint32       MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access,
+                                       // may be 0 if HW has no restrictions
+
+    HSAuint32       EngineId;          // Identifier (rev) of teh GPU uEngine or Firmware, may be 0
+
+    HSAuint16       VendorId;          // GPU vendor id; 0 on latency (= CPU)-only nodes
+    HSAuint16       DeviceId;          // GPU device id; 0 on latency (= CPU)-only nodes
+
+    HSAuint32       LocationId;        // GPU BDF (Bus/Device/function number) - identifies the device
+                                       // location in the overall system
+    HSAuint64       LocalMemSize;       // Local memory size
+    HSAuint32       MaxEngineClockMhzFCompute;  // maximum engine clocks for CPU and
+    HSAuint32       MaxEngineClockMhzCCompute;  // GPU function, including any boost caopabilities,
+
+    HSAuint16       MarketingName[HSA_PUBLIC_NAME_SIZE];   // Public name of the "device" on the node (board or APU name).
+                                       // Unicode string
+} HsaNodeProperties;
+
+
+typedef enum _HSA_HEAPTYPE
+{
+    HSA_HEAPTYPE_SYSTEM                = 0,
+    HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC   = 1, // CPU "visible" part of GPU device local memory (for discrete GPU)
+    HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE  = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU)
+                                            // All HSA accessible memory is per definition "CPU visible"
+                                            // "Private memory" is relevant for graphics interop only.
+    HSA_HEAPTYPE_GPU_GDS               = 3, // GPU internal memory (GDS)
+    HSA_HEAPTYPE_GPU_LDS               = 4, // GPU internal memory (LDS)
+    HSA_HEAPTYPE_GPU_SCRATCH           = 5, // GPU special memory (scratch)
+
+    HSA_HEAPTYPE_NUMHEAPTYPES,
+    HSA_HEAPTYPE_SIZE                  = 0xFFFFFFFF
+} HSA_HEAPTYPE;
+
+typedef union
+{
+    HSAuint32 MemoryProperty;
+    struct
+    {
+        unsigned int HotPluggable      : 1; // the memory may be removed by some system action,
+                                            // memory should be used for temporary data
+        unsigned int NonVolatile       : 1; // memory content is preserved across a power-off cycle.
+        unsigned int Reserved          :30;
+    } ui32;
+} HSA_MEMORYPROPERTY;
+
+
+//
+// Discoverable HSA Memory properties.
+// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
+//
+
+typedef struct _HsaMemoryProperties
+{
+    HSA_HEAPTYPE    HeapType;          // system or frame buffer,
+    union
+    {
+        HSAuint64   SizeInBytes;       // physical memory size of the memory range in bytes
+        struct
+        {
+            HSAuint32 SizeInBytesLow;  // physical memory size of the memory range in bytes (lower 32bit)
+            HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit)
+        } ui32;
+    };
+    HSA_MEMORYPROPERTY  Flags;         // See definitions above
+
+    HSAuint32    Width;                // memory width - the number of parallel bits of the memory interface
+    HSAuint32    MemoryClockMax;       // memory clock for the memory, this allows computing the available bandwidth
+                                       // to the memory when needed
+    HSAuint64    VirtualBaseAddress;   // if set to value != 0, indicates the virtual base address of the memory
+                                       // in process virtual space
+} HsaMemoryProperties;
+
+//
+// Discoverable Cache Properties. (optional).
+// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
+// Any of the parameters may be 0 (= not defined)
+//
+
+#define HSA_CPU_SIBLINGS            256
+#define HSA_PROCESSORID_ALL         0xFFFFFFFF
+
+typedef union
+{
+    HSAuint32 Value;
+    struct
+    {
+        unsigned int Data           : 1;
+        unsigned int Instruction    : 1;
+        unsigned int CPU            : 1;
+        unsigned int HSACU          : 1;
+        unsigned int Reserved       :28;
+    } ui32;
+} HsaCacheType;
+
+typedef struct _HaCacheProperties
+{
+    HSAuint32    ProcessorIdLow;   // Identifies the processor number
+
+    HSAuint32    CacheLevel;       // Integer representing level: 1, 2, 3, 4, etc
+    HSAuint32    CacheSize;        // Size of the cache
+    HSAuint32    CacheLineSize;    // Cache line size in bytes
+    HSAuint32    CacheLinesPerTag; // Cache lines per Cache Tag
+    HSAuint32    CacheAssociativity; // Cache Associativity
+    HSAuint32    CacheLatency;     // Cache latency in ns
+    HsaCacheType CacheType;
+    HSAuint32    SiblingMap[HSA_CPU_SIBLINGS];
+} HsaCacheProperties;
+
+
+//
+// Discoverable CPU Compute Properties. (optional).
+// The structure is the output parameter of the hsaKmtGetCComputeProperties() function
+// Any of the parameters may be 0 (= not defined)
+//
+
+typedef struct _HsaCComputeProperties
+{
+    HSAuint32    SiblingMap[HSA_CPU_SIBLINGS];
+} HsaCComputeProperties;
+
+//
+// Discoverable IoLink Properties (optional).
+// The structure is the output parameter of the hsaKmtGetIoLinkProperties() function.
+// Any of the parameters may be 0 (= not defined)
+//
+
+typedef enum _HSA_IOLINKTYPE {
+    HSA_IOLINKTYPE_UNDEFINED      = 0,
+    HSA_IOLINKTYPE_HYPERTRANSPORT = 1,
+    HSA_IOLINKTYPE_PCIEXPRESS     = 2,
+    HSA_IOLINKTYPE_AMBA           = 3,
+    HSA_IOLINKTYPE_MIPI           = 4,
+    HSA_IOLINKTYPE_OTHER          = 5,
+    HSA_IOLINKTYPE_NUMIOLINKTYPES,
+    HSA_IOLINKTYPE_SIZE           = 0xFFFFFFFF
+} HSA_IOLINKTYPE;
+
+typedef union
+{
+    HSAuint32 LinkProperty;
+    struct
+    {
+        unsigned int Override          : 1;  // bus link properties are determined by this structure
+                                             // not by the HSA_IOLINKTYPE. The other flags are valid
+                                             // only if this bit is set to one
+        unsigned int NonCoherent       : 1;  // The link doesn't support coherent transactions
+                                             // memory accesses across must not be set to "host cacheable"!
+        unsigned int NoAtomics32bit    : 1;  // The link doesn't support 32bit-wide atomic transactions
+        unsigned int NoAtomics64bit    : 1;  // The link doesn't support 64bit-wide atomic transactions
+        unsigned int Reserved          :28;
+    } ui32;
+} HSA_LINKPROPERTY;
+
+
+typedef struct _HsaIoLinkProperties
+{
+    HSA_IOLINKTYPE  IoLinkType;      // see above
+    HSAuint32    VersionMajor;       // Bus interface version (optional)
+    HSAuint32    VersionMinor;       // Bus interface version (optional)
+
+    HSAuint32    NodeFrom;           //
+    HSAuint32    NodeTo;             //
+
+    HSAuint32    Weight;             // weight factor (derived from CDIT)
+
+    HSAuint32    MinimumLatency;     // minimum cost of time to transfer (rounded to ns)
+    HSAuint32    MaximumLatency;     // maximum cost of time to transfer (rounded to ns)
+    HSAuint32    MinimumBandwidth;   // minimum interface Bandwidth in MB/s
+    HSAuint32    MaximumBandwidth;   // maximum interface Bandwidth in MB/s
+    HSAuint32    RecTransferSize;    // recommended transfer size to reach maximum bandwidth in Bytes
+    HSA_LINKPROPERTY Flags;          // override flags (may be active for specific platforms)
+} HsaIoLinkProperties;
+
+//
+// Memory allocation definitions for the KFD HSA interface
+//
+
+typedef struct _HsaMemFlags
+{
+    union
+    {
+        struct
+        {
+            unsigned int NonPaged    : 1; // default = 0: pageable memory
+            unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE
+            unsigned int ReadOnly    : 1; // default = 0: Read/Write memory
+            unsigned int PageSize    : 2; // see HSA_PAGE_SIZE
+            unsigned int HostAccess  : 1; // default = 0: GPU access only
+            unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on
+                                          // discrete GPU local), allocation may fall back to system memory node 0
+                                          // memory (= always available). Otherwise no allocation is possible.
+            unsigned int GDSMemory   : 1; // default = 0: If set, the allocation will occur in GDS heap.
+                                          // HostAccess must be 0, all other flags (except NoSubstitute) should
+                                          // be 0 when setting this entry to 1. GDS allocation may fail due to
+                                          // limited resources. Application code is required to work without
+                                          // any allocated GDS memory using regular memory.
+                                          // Allocation fails on any node without GPU function.
+            unsigned int Scratch     : 1; // default = 0: If set, the allocation will occur in GPU "scratch area".
+                                          // HostAccess must be 0, all other flags (except NoSubstitute) should be 0
+                                          // when setting this entry to 1. Scratch allocation may fail due to limited
+                                          // resources. Application code is required to work without any allocation.
+                                          // Allocation fails on any node without GPU function.
+            unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow 
+                                              // atomic ops processing. On AMD APU, this will use the ATC path on system 
+                                              // memory, irrespective of the NonPaged flag setting (= if NonPaged is set, 
+                                              // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM). 
+                                              // All atomic ops must be supported on this memory.
+            unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however 
+                                                 // focused on AMD discrete GPU that support PCIe atomics; the memory 
+                                                 // allocation is mapped to allow for PCIe atomics to operate on system 
+                                                 // memory, irrespective of NonPaged set or the presence of an ATC path 
+                                                 // in the system. The atomic operations supported are limited to SWAP, 
+                                                 // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic 
+                                                 // increment and decrement via 2-complement arithmetic), which are the 
+                                                 // only atomic ops directly supported in PCI Express.
+                                                 // On AMD APU, setting this flag will allocate the same type of memory 
+                                                 // as AtomicAccessFull, but it will be considered compatible with 
+                                                 // discrete GPU atomic operations access.
+            unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed 
+                                           // for executable code (e.g. queue memory) by the host CPU or the device. 
+                                           // Influences the page attribute setting within the allocation
+            unsigned int Reserved    : 19;
+
+        } ui32;
+        HSAuint32 Value;
+    };
+} HsaMemFlags;
+
+typedef enum _HSA_CACHING_TYPE
+{
+    HSA_CACHING_CACHED        = 0,
+    HSA_CACHING_NONCACHED     = 1,
+    HSA_CACHING_WRITECOMBINED = 2,
+    HSA_CACHING_RESERVED      = 3,
+    HSA_CACHING_NUM_CACHING,
+    HSA_CACHING_SIZE          = 0xFFFFFFFF
+} HSA_CACHING_TYPE;
+
+typedef enum _HSA_PAGE_SIZE
+{
+    HSA_PAGE_SIZE_4KB         = 0,
+    HSA_PAGE_SIZE_64KB        = 1,  //64KB pages, not generally available in systems
+    HSA_PAGE_SIZE_2MB         = 2,
+    HSA_PAGE_SIZE_1GB         = 3,  //1GB pages, not generally available in systems
+} HSA_PAGE_SIZE;
+
+
+typedef enum _HSA_DEVICE
+{
+    HSA_DEVICE_CPU  = 0,
+    HSA_DEVICE_GPU  = 1,
+    MAX_HSA_DEVICE  = 2
+} HSA_DEVICE;
+
+
+typedef enum _HSA_QUEUE_PRIORITY
+{
+    HSA_QUEUE_PRIORITY_MINIMUM        = -3,
+    HSA_QUEUE_PRIORITY_LOW            = -2,
+    HSA_QUEUE_PRIORITY_BELOW_NORMAL   = -1,
+    HSA_QUEUE_PRIORITY_NORMAL         =  0,
+    HSA_QUEUE_PRIORITY_ABOVE_NORMAL   =  1,
+    HSA_QUEUE_PRIORITY_HIGH           =  2,
+    HSA_QUEUE_PRIORITY_MAXIMUM        =  3,
+    HSA_QUEUE_PRIORITY_NUM_PRIORITY,
+    HSA_QUEUE_PRIORITY_SIZE           = 0xFFFFFFFF
+} HSA_QUEUE_PRIORITY;
+
+typedef enum _HSA_QUEUE_TYPE
+{
+    HSA_QUEUE_COMPUTE            = 1,  // AMD PM4 compatible Compute Queue
+    HSA_QUEUE_SDMA               = 2,  // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
+    HSA_QUEUE_MULTIMEDIA_DECODE  = 3,  // reserved, for HSA multimedia decode queue
+    HSA_QUEUE_MULTIMEDIA_ENCODE  = 4,  // reserved, for HSA multimedia encode queue
+
+    // the following values indicate a queue type permitted to reference OS graphics
+    // resources through the interoperation API. See [5] "HSA Graphics Interoperation
+    // specification" for more details on use of such resources.
+
+    HSA_QUEUE_COMPUTE_OS           = 11, // AMD PM4 compatible Compute Queue
+    HSA_QUEUE_SDMA_OS              = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
+    HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue
+    HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14,  // reserved, for HSA multimedia encode queue
+
+    HSA_QUEUE_COMPUTE_AQL          = 21, // HSA AQL packet compatible Compute Queue
+    HSA_QUEUE_DMA_AQL              = 22, // HSA AQL packet compatible DMA Queue
+
+    // more types in the future
+
+    HSA_QUEUE_TYPE_SIZE            = 0xFFFFFFFF     //aligns to 32bit enum
+} HSA_QUEUE_TYPE;
+
+typedef struct _HsaQueueResource
+{
+    HSA_QUEUEID     QueueId;    /** queue ID */
+    /** Doorbell address to notify HW of a new dispatch */
+    union
+    {
+        HSAuint32*  Queue_DoorBell;
+        HSAuint64*  Queue_DoorBell_aql;
+        HSAuint64   QueueDoorBell;
+    };
+
+    /** virtual address to notify HW of queue write ptr value */
+    union
+    {
+        HSAuint32*  Queue_write_ptr;
+        HSAuint64*  Queue_write_ptr_aql;
+        HSAuint64   QueueWptrValue;
+    };
+
+    /** virtual address updated by HW to indicate current read location */
+    union
+    {
+        HSAuint32*  Queue_read_ptr;
+        HSAuint64*  Queue_read_ptr_aql;
+        HSAuint64   QueueRptrValue;
+    };
+
+} HsaQueueResource;
+
+
+//TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform
+typedef struct _HsaQueueReport
+{
+    HSAuint32     VMID;         //Required on SI to dispatch IB in primary ring
+    void*         QueueAddress; //virtual address of UM mapped compute ring
+    HSAuint64     QueueSize;    //size of the UM mapped compute ring
+} HsaQueueReport;
+
+
+
+typedef enum _HSA_DBG_WAVEOP
+{
+    HSA_DBG_WAVEOP_HALT        = 1, //Halts a wavefront
+    HSA_DBG_WAVEOP_RESUME      = 2, //Resumes a wavefront
+    HSA_DBG_WAVEOP_KILL        = 3, //Kills a wavefront
+    HSA_DBG_WAVEOP_DEBUG       = 4, //Causes wavefront to enter debug mode
+    HSA_DBG_WAVEOP_TRAP        = 5, //Causes wavefront to take a trap
+    HSA_DBG_NUM_WAVEOP         = 5,
+    HSA_DBG_MAX_WAVEOP         = 0xFFFFFFFF
+} HSA_DBG_WAVEOP;
+
+typedef enum _HSA_DBG_WAVEMODE
+{
+    HSA_DBG_WAVEMODE_SINGLE               = 0,  //send command to a single wave
+    //Broadcast to all wavefronts of all processes is not supported for HSA user mode
+    HSA_DBG_WAVEMODE_BROADCAST_PROCESS    = 2,  //send to waves within current process
+    HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,  //send to waves within current process on CU
+    HSA_DBG_NUM_WAVEMODE                  = 3,
+    HSA_DBG_MAX_WAVEMODE                  = 0xFFFFFFFF
+} HSA_DBG_WAVEMODE;
+
+
+typedef enum _HSA_DBG_WAVEMSG_TYPE
+{
+    HSA_DBG_WAVEMSG_AUTO    = 0,
+    HSA_DBG_WAVEMSG_USER    = 1,
+    HSA_DBG_WAVEMSG_ERROR   = 2,
+    HSA_DBG_NUM_WAVEMSG,
+    HSA_DBG_MAX_WAVEMSG     = 0xFFFFFFFF
+} HSA_DBG_WAVEMSG_TYPE;
+
+typedef enum _HSA_DBG_WATCH_MODE
+{
+    HSA_DBG_WATCH_READ        = 0, //Read operations only
+    HSA_DBG_WATCH_NONREAD     = 1, //Write or Atomic operations only
+    HSA_DBG_WATCH_ATOMIC      = 2, //Atomic Operations only
+    HSA_DBG_WATCH_ALL         = 3, //Read, Write or Atomic operations
+    HSA_DBG_WATCH_NUM,
+    HSA_DBG_WATCH_SIZE        = 0xFFFFFFFF
+} HSA_DBG_WATCH_MODE;
+
+
+//This structure is hardware specific and may change in the future
+typedef struct _HsaDbgWaveMsgAMDGen2
+{
+    HSAuint32      Value;
+    HSAuint32      Reserved2;
+
+} HsaDbgWaveMsgAMDGen2;
+
+typedef union _HsaDbgWaveMessageAMD
+{
+    HsaDbgWaveMsgAMDGen2    WaveMsgInfoGen2;
+    //for future HsaDbgWaveMsgAMDGen3;
+} HsaDbgWaveMessageAMD;
+
+typedef struct _HsaDbgWaveMessage
+{
+    void*                   MemoryVA;         // ptr to associated host-accessible data
+    HsaDbgWaveMessageAMD    DbgWaveMsg;
+} HsaDbgWaveMessage;
+
+
+//
+// HSA sync primitive, Event and HW Exception notification API definitions
+// The API functions allow the runtime to define a so-called sync-primitive, a SW object
+// combining a user-mode provided "syncvar" and a scheduler event that can be signaled
+// through a defined GPU interrupt. A syncvar is a process virtual memory location of
+// a certain size that can be accessed by CPU and GPU shader code within the process to set
+// and query the content within that memory. The definition of the content is determined by
+// the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
+// The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
+// user mode instruction stream.
+// The OS scheduler event is typically associated and signaled by an interrupt issued by
+// the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be
+// surfaced by the KFD by this mechanism, too.
+//
+
+// these are the new definitions for events
+typedef enum _HSA_EVENTTYPE
+{
+    HSA_EVENTTYPE_SIGNAL                     = 0, //user-mode generated GPU signal
+    HSA_EVENTTYPE_NODECHANGE                 = 1, //HSA node change (attach/detach)
+    HSA_EVENTTYPE_DEVICESTATECHANGE          = 2, //HSA device state change( start/stop )
+    HSA_EVENTTYPE_HW_EXCEPTION               = 3, //GPU shader exception event
+    HSA_EVENTTYPE_SYSTEM_EVENT               = 4, //GPU SYSCALL with parameter info
+    HSA_EVENTTYPE_DEBUG_EVENT                = 5, //GPU signal for debugging
+    HSA_EVENTTYPE_PROFILE_EVENT              = 6, //GPU signal for profiling
+    HSA_EVENTTYPE_QUEUE_EVENT                = 7, //GPU signal queue idle state (EOP pm4)
+    HSA_EVENTTYPE_MEMORY                     = 8, //GPU signal for signaling memory access faults and memory subsystem issues
+    //...
+    HSA_EVENTTYPE_MAXID,
+    HSA_EVENTTYPE_TYPE_SIZE                  = 0xFFFFFFFF
+} HSA_EVENTTYPE;
+
+typedef HSAuint32  HSA_EVENTID;
+
+//
+// Subdefinitions for various event types: Syncvar
+//
+
+typedef struct _HsaSyncVar
+{
+    union
+    {
+        void*       UserData;           //pointer to user mode data
+        HSAuint64   UserDataPtrValue;   //64bit compatibility of value
+    } SyncVar;
+    HSAuint64       SyncVarSize;
+} HsaSyncVar;
+
+//
+// Subdefinitions for various event types: NodeChange
+//
+
+typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS
+{
+    HSA_EVENTTYPE_NODECHANGE_ADD     = 0,
+    HSA_EVENTTYPE_NODECHANGE_REMOVE  = 1,
+    HSA_EVENTTYPE_NODECHANGE_SIZE    = 0xFFFFFFFF
+} HSA_EVENTTYPE_NODECHANGE_FLAGS;
+
+typedef struct _HsaNodeChange
+{
+    HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;   // HSA node added/removed on the platform
+} HsaNodeChange;
+
+//
+// Sub-definitions for various event types: DeviceStateChange
+//
+
+typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS
+{
+    HSA_EVENTTYPE_DEVICESTATUSCHANGE_START     = 0, //device started (and available)
+    HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP      = 1, //device stopped (i.e. unavailable)
+    HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE      = 0xFFFFFFFF
+} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;
+
+typedef struct _HsaDeviceStateChange
+{
+    HSAuint32                           NodeId;     // F-NUMA node that contains the device
+    HSA_DEVICE                          Device;     // device type: GPU or CPU
+    HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags;    // event flags
+} HsaDeviceStateChange;
+
+//
+// Sub-definitions for various event types: Memory exception
+//
+
+typedef enum _HSA_EVENTID_MEMORYFLAGS
+{
+    HSA_EVENTID_MEMORY_RECOVERABLE           = 0, //access fault, recoverable after page adjustment
+    HSA_EVENTID_MEMORY_FATAL_PROCESS         = 1, //memory access requires process context destruction, unrecoverable
+    HSA_EVENTID_MEMORY_FATAL_VM              = 2, //memory access requires all GPU VA context destruction, unrecoverable
+} HSA_EVENTID_MEMORYFLAGS;
+
+typedef struct _HsaAccessAttributeFailure
+{
+    unsigned int NotPresent  : 1;  // Page not present or supervisor privilege 
+    unsigned int ReadOnly    : 1;  // Write access to a read-only page
+    unsigned int NoExecute   : 1;  // Execute access to a page marked NX
+    unsigned int GpuAccess   : 1;  // Host access only
+    unsigned int ECC         : 1;  // ECC failure (if supported by HW)
+    unsigned int Reserved    : 27; // must be 0
+} HsaAccessAttributeFailure;
+
+// data associated with HSA_EVENTID_MEMORY
+typedef struct _HsaMemoryAccessFault
+{
+    HSAuint32                       NodeId;             // H-NUMA node that contains the device where the memory access occurred
+    HSAuint64                       VirtualAddress;     // virtual address this occurred on
+    HsaAccessAttributeFailure       Failure;            // failure attribute
+    HSA_EVENTID_MEMORYFLAGS         Flags;              // event flags
+} HsaMemoryAccessFault;
+
+typedef struct _HsaEventData
+{
+    HSA_EVENTTYPE   EventType;      //event type
+
+    union
+    {
+        // return data associated with HSA_EVENTTYPE_SIGNAL and other events
+        HsaSyncVar              SyncVar;
+
+        // data associated with HSA_EVENTTYPE_NODE_CHANGE
+        HsaNodeChange           NodeChangeState;
+
+        // data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE
+        HsaDeviceStateChange    DeviceState;
+
+        // data associated with HSA_EVENTTYPE_MEMORY
+        HsaMemoryAccessFault    MemoryAccessFault;
+
+    } EventData;
+
+    // the following data entries are internal to the KFD & thunk itself.
+
+    HSAuint64       HWData1;                    // internal thunk store for Event data  (OsEventHandle)
+    HSAuint64       HWData2;                    // internal thunk store for Event data  (HWAddress)
+    HSAuint32       HWData3;                    // internal thunk store for Event data  (HWData)
+} HsaEventData;
+
+
+typedef struct _HsaEventDescriptor
+{
+    HSA_EVENTTYPE   EventType;                  // event type to allocate
+    HSAuint32       NodeId;                     // H-NUMA node containing GPU device that is event source
+    HsaSyncVar      SyncVar;                    // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL
+} HsaEventDescriptor;
+
+
+typedef struct _HsaEvent
+{
+    HSA_EVENTID     EventId;
+    HsaEventData    EventData;
+} HsaEvent;
+
+typedef enum _HsaEventTimeout
+{
+    HSA_EVENTTIMEOUT_IMMEDIATE  = 0,
+    HSA_EVENTTIMEOUT_INFINITE   = 0xFFFFFFFF
+} HsaEventTimeOut;
+
+typedef struct _HsaClockCounters
+{
+    HSAuint64   GPUClockCounter;
+    HSAuint64   CPUClockCounter;
+    HSAuint64   SystemClockCounter;
+    HSAuint64   SystemClockFrequencyHz;
+} HsaClockCounters;
+
+#ifndef DEFINE_GUID
+typedef struct _HSA_UUID
+{
+    HSAuint32   Data1;
+    HSAuint16   Data2;
+    HSAuint16   Data3;
+    HSAuint8    Data4[8];
+} HSA_UUID;
+
+#define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
+    static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
+#else
+#define HSA_UUID GUID
+#define HSA_DEFINE_UUID DEFINE_GUID
+#endif
+
+
+// GUID that identifies the GPU Shader Sequencer (SQ) block
+// {B5C396B6-D310-47E4-86FC-5CC3043AF508}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ,
+0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8);
+
+// GUID that identifies the GPU Memory Controller (MC) block
+// {13900B57-4956-4D98-81D0-68521937F59C}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC,
+0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c);
+
+// GUID that identifies the IMOMMUv2 HW device
+// {80969879-B0F6-4BE6-97F6-6A6300F5101D}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2,
+0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d);
+
+// GUID that identifies the KFD
+// {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER,
+0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa);
+
+typedef enum _HSA_PROFILE_TYPE
+{
+    HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only)
+    HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously
+                                               //writes to memory on updates (KFD access only)
+    HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE    = 2, //user-queue accessible counter
+    HSA_PROFILE_TYPE_NONPRIV_STREAMING    = 3, //user-queue accessible counter
+    //...
+    HSA_PROFILE_TYPE_NUM,
+
+    HSA_PROFILE_TYPE_SIZE                 = 0xFFFFFFFF      // In order to align to 32-bit value
+} HSA_PROFILE_TYPE;
+
+
+typedef struct _HsaCounterFlags
+{
+    union
+    {
+        struct
+        {
+            unsigned int  Global       : 1;  // counter is global
+                                             // (not tied to VMID/WAVE/CU, ...)
+            unsigned int  Resettable   : 1;  // counter can be reset by SW
+                                             // (always to 0?)
+            unsigned int  ReadOnly     : 1;  // counter is read-only
+                                             // (but may be reset, if indicated)
+            unsigned int  Stream       : 1;  // counter has streaming capability
+                                             // (after trigger, updates buffer)
+            unsigned int  Reserved     : 28;
+        } ui32;
+        HSAuint32      Value;
+    };
+} HsaCounterFlags;
+
+
+typedef struct _HsaCounter
+{
+    HSA_PROFILE_TYPE Type;              // specifies the counter type
+    HSAuint64        CounterId;         // indicates counter register offset
+    HSAuint32        CounterSizeInBits; // indicates relevant counter bits
+    HSAuint64        CounterMask;       // bitmask for counter value (if applicable)
+    HsaCounterFlags  Flags;             // Property flags (see above)
+    HSAuint32        BlockIndex;        // identifies block the counter belongs to,
+                                        // value may be 0 to NumBlocks
+} HsaCounter;
+
+
+typedef struct _HsaCounterBlockProperties
+{
+    HSA_UUID                    BlockId;        // specifies the block location
+    HSAuint32                   NumCounters;    // How many counters are available?
+                                                // (sizes Counters[] array below)
+    HSAuint32                   NumConcurrent;  // How many counter slots are available
+                                                // in block?
+    HsaCounter                  Counters[1];    // Start of counter array
+                                                // (NumCounters elements total)
+} HsaCounterBlockProperties;
+
+
+typedef struct _HsaCounterProperties
+{
+    HSAuint32                   NumBlocks;      // How many profilable block are available?
+                                                // (sizes Blocks[] array below)
+    HSAuint32                   NumConcurrent;  // How many blocks slots can be queried
+                                                // concurrently by HW?
+    HsaCounterBlockProperties   Blocks[1];      // Start of block array
+                                                // (NumBlocks elements total)
+} HsaCounterProperties;
+
+typedef HSAuint64   HSATraceId;
+
+typedef struct _HsaPmcTraceRoot
+{
+    HSAuint64                   TraceBufferMinSizeBytes;// (page aligned)
+    HSAuint32                   NumberOfPasses;
+    HSATraceId                  TraceId;
+} HsaPmcTraceRoot;
+
+#pragma pack(pop, hsakmttypes_h)
+
+
+#ifdef __cplusplus
+}   //extern "C"
+#endif
+
+#endif //_HSAKMTTYPES_H_
diff --git a/hsakmt/kfd_ioctl.h b/hsakmt/kfd_ioctl.h
new file mode 100644
index 0000000..d683342
--- /dev/null
+++ b/hsakmt/kfd_ioctl.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_IOCTL_H_INCLUDED
+#define KFD_IOCTL_H_INCLUDED
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define KFD_IOCTL_MAJOR_VERSION 1
+#define KFD_IOCTL_MINOR_VERSION 1
+
+struct kfd_ioctl_get_version_args {
+	uint32_t major_version;	/* from KFD */
+	uint32_t minor_version;	/* from KFD */
+};
+
+/* For kfd_ioctl_create_queue_args.queue_type. */
+#define KFD_IOC_QUEUE_TYPE_COMPUTE	0
+#define KFD_IOC_QUEUE_TYPE_SDMA		1
+#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL	2
+
+#define KFD_MAX_QUEUE_PERCENTAGE	100
+#define KFD_MAX_QUEUE_PRIORITY		15
+
+struct kfd_ioctl_create_queue_args {
+	uint64_t ring_base_address;	/* to KFD */
+	uint64_t write_pointer_address;	/* from KFD */
+	uint64_t read_pointer_address;	/* from KFD */
+	uint64_t doorbell_offset;	/* from KFD */
+
+	uint32_t ring_size;		/* to KFD */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t queue_type;		/* to KFD */
+	uint32_t queue_percentage;	/* to KFD */
+	uint32_t queue_priority;	/* to KFD */
+	uint32_t queue_id;		/* from KFD */
+
+	uint64_t eop_buffer_address;	/* to KFD */
+	uint64_t eop_buffer_size;	/* to KFD */
+	uint64_t ctx_save_restore_address; /* to KFD */
+	uint64_t ctx_save_restore_size;	/* to KFD */
+};
+
+struct kfd_ioctl_destroy_queue_args {
+	uint32_t queue_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_update_queue_args {
+	uint64_t ring_base_address;	/* to KFD */
+
+	uint32_t queue_id;		/* to KFD */
+	uint32_t ring_size;		/* to KFD */
+	uint32_t queue_percentage;	/* to KFD */
+	uint32_t queue_priority;	/* to KFD */
+};
+
+/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
+#define KFD_IOC_CACHE_POLICY_COHERENT 0
+#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
+
+struct kfd_ioctl_set_memory_policy_args {
+	uint64_t alternate_aperture_base;	/* to KFD */
+	uint64_t alternate_aperture_size;	/* to KFD */
+
+	uint32_t gpu_id;			/* to KFD */
+	uint32_t default_policy;		/* to KFD */
+	uint32_t alternate_policy;		/* to KFD */
+	uint32_t pad;
+};
+
+/*
+ * All counters are monotonic. They are used for profiling of compute jobs.
+ * The profiling is done by userspace.
+ *
+ * In case of GPU reset, the counter should not be affected.
+ */
+
+struct kfd_ioctl_get_clock_counters_args {
+	uint64_t gpu_clock_counter;	/* from KFD */
+	uint64_t cpu_clock_counter;	/* from KFD */
+	uint64_t system_clock_counter;	/* from KFD */
+	uint64_t system_clock_freq;	/* from KFD */
+
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
+#define NUM_OF_SUPPORTED_GPUS 7
+
+struct kfd_process_device_apertures {
+	uint64_t lds_base;		/* from KFD */
+	uint64_t lds_limit;		/* from KFD */
+	uint64_t scratch_base;		/* from KFD */
+	uint64_t scratch_limit;		/* from KFD */
+	uint64_t gpuvm_base;		/* from KFD */
+	uint64_t gpuvm_limit;		/* from KFD */
+	uint32_t gpu_id;		/* from KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_get_process_apertures_args {
+	struct kfd_process_device_apertures
+			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
+
+	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
+	uint32_t num_of_nodes;
+	uint32_t pad;
+};
+
+#define MAX_ALLOWED_NUM_POINTS    100
+#define MAX_ALLOWED_AW_BUFF_SIZE 4096
+#define MAX_ALLOWED_WAC_BUFF_SIZE  128
+
+struct kfd_ioctl_dbg_register_args {
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_unregister_args {
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_address_watch_args {
+	uint64_t content_ptr;		/* a pointer to the actual content */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+};
+
+struct kfd_ioctl_dbg_wave_control_args {
+	uint64_t content_ptr;		/* a pointer to the actual content */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+};
+
+/* Matching HSA_EVENTTYPE */
+#define KFD_IOC_EVENT_SIGNAL			0
+#define KFD_IOC_EVENT_NODECHANGE		1
+#define KFD_IOC_EVENT_DEVICESTATECHANGE		2
+#define KFD_IOC_EVENT_HW_EXCEPTION		3
+#define KFD_IOC_EVENT_SYSTEM_EVENT		4
+#define KFD_IOC_EVENT_DEBUG_EVENT		5
+#define KFD_IOC_EVENT_PROFILE_EVENT		6
+#define KFD_IOC_EVENT_QUEUE_EVENT		7
+#define KFD_IOC_EVENT_MEMORY			8
+
+#define KFD_IOC_WAIT_RESULT_COMPLETE		0
+#define KFD_IOC_WAIT_RESULT_TIMEOUT		1
+#define KFD_IOC_WAIT_RESULT_FAIL		2
+
+#define KFD_SIGNAL_EVENT_LIMIT			256
+
+struct kfd_ioctl_create_event_args {
+	uint64_t event_page_offset;	/* from KFD */
+	uint32_t event_trigger_data;	/* from KFD - signal events only */
+	uint32_t event_type;		/* to KFD */
+	uint32_t auto_reset;		/* to KFD */
+	uint32_t node_id;		/* to KFD - only valid for certain
+							event types */
+	uint32_t event_id;		/* from KFD */
+	uint32_t event_slot_index;	/* from KFD */
+};
+
+struct kfd_ioctl_destroy_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_set_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_reset_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_memory_exception_failure {
+	uint32_t NotPresent;	/* Page not present or supervisor privilege */
+	uint32_t ReadOnly;	/* Write access to a read-only page */
+	uint32_t NoExecute;	/* Execute access to a page marked NX */
+	uint32_t pad;
+};
+
+/* memory exception data*/
+struct kfd_hsa_memory_exception_data {
+	struct kfd_memory_exception_failure failure;
+	uint64_t va;
+	uint32_t gpu_id;
+	uint32_t pad;
+};
+
+/* Event data*/
+struct kfd_event_data {
+	union {
+		struct kfd_hsa_memory_exception_data memory_exception_data;
+	};				/* From KFD */
+	uint64_t kfd_event_data_ext;	/* pointer to an extension structure
+					   for future exception types */
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_wait_events_args {
+	uint64_t events_ptr;		/* pointed to struct
+					   kfd_event_data array, to KFD */
+	uint32_t num_events;		/* to KFD */
+	uint32_t wait_for_all;		/* to KFD */
+	uint32_t timeout;		/* to KFD */
+	uint32_t wait_result;		/* from KFD */
+};
+
+#define AMDKFD_IOCTL_BASE 'K'
+#define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
+#define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
+#define AMDKFD_IOW(nr, type)		_IOW(AMDKFD_IOCTL_BASE, nr, type)
+#define AMDKFD_IOWR(nr, type)		_IOWR(AMDKFD_IOCTL_BASE, nr, type)
+
+#define AMDKFD_IOC_GET_VERSION			\
+		AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args)
+
+#define AMDKFD_IOC_CREATE_QUEUE			\
+		AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args)
+
+#define AMDKFD_IOC_DESTROY_QUEUE		\
+		AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args)
+
+#define AMDKFD_IOC_SET_MEMORY_POLICY		\
+		AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args)
+
+#define AMDKFD_IOC_GET_CLOCK_COUNTERS		\
+		AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args)
+
+#define AMDKFD_IOC_GET_PROCESS_APERTURES	\
+		AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args)
+
+#define AMDKFD_IOC_UPDATE_QUEUE			\
+		AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
+
+#define AMDKFD_IOC_CREATE_EVENT			\
+		AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
+
+#define AMDKFD_IOC_DESTROY_EVENT		\
+		AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
+
+#define AMDKFD_IOC_SET_EVENT			\
+		AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
+
+#define AMDKFD_IOC_RESET_EVENT			\
+		AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
+
+#define AMDKFD_IOC_WAIT_EVENTS			\
+		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
+
+#define AMDKFD_IOC_DBG_REGISTER			\
+		AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
+
+#define AMDKFD_IOC_DBG_UNREGISTER		\
+		AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
+
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH		\
+		AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
+
+#define AMDKFD_IOC_DBG_WAVE_CONTROL		\
+		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
+
+#define AMDKFD_COMMAND_START		0x01
+#define AMDKFD_COMMAND_END		0x11
+
+#endif
diff --git a/hsakmt/libhsakmt.c b/hsakmt/libhsakmt.c
new file mode 100644
index 0000000..d7f79d3
--- /dev/null
+++ b/hsakmt/libhsakmt.c
@@ -0,0 +1,18 @@
+#include <errno.h>
+#include <sys/ioctl.h>
+
+#include "libhsakmt.h"
+
+/**
+ * Call ioctl, restarting if it is interupted
+ */
+int
+kmtIoctl(int fd, unsigned long request, void *arg)
+{
+    int	ret;
+
+    do {
+	ret = ioctl(fd, request, arg);
+    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+    return ret;
+}
diff --git a/hsakmt/libhsakmt.h b/hsakmt/libhsakmt.h
new file mode 100644
index 0000000..0d73c8f
--- /dev/null
+++ b/hsakmt/libhsakmt.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIBHSAKMT_H_INCLUDED
+#define LIBHSAKMT_H_INCLUDED
+
+#include "hsakmt.h"
+#include <pthread.h>
+#include <stdint.h>
+#include <limits.h>
+
+extern int kfd_fd;
+extern unsigned long kfd_open_count;
+extern pthread_mutex_t hsakmt_mutex;
+
+#undef HSAKMTAPI
+#define HSAKMTAPI __attribute__((visibility ("default")))
+
+/*Avoid pointer-to-int-cast warning*/
+#define PORT_VPTR_TO_UINT64(vptr) ((uint64_t)(unsigned long)(vptr))
+
+/*Avoid int-to-pointer-cast warning*/
+#define PORT_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v))
+
+#define CHECK_KFD_OPEN() \
+	do { if (kfd_open_count == 0) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0)
+
+#define PAGE_SIZE 4096
+
+#define CHECK_PAGE_MULTIPLE(x) \
+	do { if ((uint64_t)PORT_VPTR_TO_UINT64(x) % PAGE_SIZE) return HSAKMT_STATUS_INVALID_PARAMETER; } while(0)
+
+#define PAGE_ALIGN_UP(x) (((uint64_t)(x) + PAGE_SIZE - 1) & ~(uint64_t)(PAGE_SIZE-1))
+#define BITMASK(n) (((n) < sizeof(1ULL) * CHAR_BIT ? (1ULL << (n)) : 0) - 1ULL)
+
+/*
+ * Even though the toplogy code doesn't limit us to maximum number of nodes,
+ * the current HSA spec says the maximum is 8 nodes
+ */
+#define MAX_NODES 8
+
+HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
+HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
+uint16_t get_device_id_by_node(HSAuint32 node_id);
+
+extern int kmtIoctl(int fd, unsigned long request, void *arg);
+
+/* Void pointer arithmetic (or remove -Wpointer-arith to allow void pointers arithmetic) */
+#define VOID_PTR_ADD32(ptr,n) (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
+#define VOID_PTR_ADD(ptr,n) (void*)((uint8_t*)(ptr) + n)/*ptr + offset*/
+#define VOID_PTR_SUB(ptr,n) (void*)((uint8_t*)(ptr) - n)/*ptr - offset*/
+#define VOID_PTRS_SUB(ptr1,ptr2) (uint64_t)((uint8_t*)(ptr1) - (uint8_t*)(ptr2)) /*ptr1 - ptr2*/
+
+#endif
diff --git a/hsakmt/libhsakmt.ver b/hsakmt/libhsakmt.ver
new file mode 100644
index 0000000..9c6e6cb
--- /dev/null
+++ b/hsakmt/libhsakmt.ver
@@ -0,0 +1,46 @@
+HSAKMT_1
+{
+global:
+hsaKmtOpenKFD;
+hsaKmtCloseKFD;
+hsaKmtGetVersion;
+hsaKmtAcquireSystemProperties;
+hsaKmtReleaseSystemProperties;
+hsaKmtGetNodeProperties;
+hsaKmtGetNodeMemoryProperties;
+hsaKmtGetNodeCacheProperties;
+hsaKmtGetNodeIoLinkProperties;
+hsaKmtCreateEvent;
+hsaKmtDestroyEvent;
+hsaKmtSetEvent;
+hsaKmtResetEvent;
+hsaKmtQueryEventState;
+hsaKmtWaitOnEvent;
+hsaKmtWaitOnMultipleEvents;
+hsaKmtCreateQueue;
+hsaKmtUpdateQueue;
+hsaKmtDestroyQueue;
+hsaKmtSetMemoryPolicy;
+hsaKmtAllocMemory;
+hsaKmtFreeMemory;
+hsaKmtRegisterMemory;
+hsaKmtDeregisterMemory;
+hsaKmtMapMemoryToGPU;
+hsaKmtUnmapMemoryToGPU;
+hsaKmtDbgRegister;
+hsaKmtDbgUnregister;
+hsaKmtDbgWavefrontControl;
+hsaKmtDbgAddressWatch;
+hsaKmtGetClockCounters;
+hsaKmtPmcGetCounterProperties;
+hsaKmtPmcRegisterTrace;
+hsaKmtPmcUnregisterTrace;
+hsaKmtPmcAcquireTraceAccess;
+hsaKmtPmcReleaseTraceAccess;
+hsaKmtPmcStartTrace;
+hsaKmtPmcQueryTrace;
+hsaKmtPmcStopTrace;
+
+local: *;
+};
+
diff --git a/hsakmt/memory.c b/hsakmt/memory.c
new file mode 100644
index 0000000..718dd97
--- /dev/null
+++ b/hsakmt/memory.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "fmm.h"
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetMemoryPolicy(
+	HSAuint32 Node,
+	HSAuint32 DefaultPolicy,
+	HSAuint32 AlternatePolicy,
+	void* MemoryAddressAlternate,
+	HSAuint64 MemorySizeInBytes
+	)
+{
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+
+	CHECK_KFD_OPEN();
+
+	result = validate_nodeid(Node, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	// We accept any legal policy and alternate address location. You get CC everywhere anyway.
+	if ((DefaultPolicy != HSA_CACHING_CACHED && DefaultPolicy != HSA_CACHING_NONCACHED)
+	    || (AlternatePolicy != HSA_CACHING_CACHED && AlternatePolicy != HSA_CACHING_NONCACHED))
+	{
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+	}
+
+	CHECK_PAGE_MULTIPLE(MemoryAddressAlternate);
+	CHECK_PAGE_MULTIPLE(MemorySizeInBytes);
+
+	struct kfd_ioctl_set_memory_policy_args args;
+	memset(&args, 0, sizeof(args));
+
+	args.gpu_id = gpu_id;
+	args.default_policy = (DefaultPolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT;
+	args.alternate_policy = (AlternatePolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT;
+	args.alternate_aperture_base = (uintptr_t)MemoryAddressAlternate;
+	args.alternate_aperture_size = MemorySizeInBytes;
+
+	int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
+
+	return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
+}
+
+static HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags)
+{
+	switch (pageSizeFlags)
+	{
+	case HSA_PAGE_SIZE_4KB: return 4*1024;
+	case HSA_PAGE_SIZE_64KB: return 64*1024;
+	case HSA_PAGE_SIZE_2MB: return 2*1024*1024;
+	case HSA_PAGE_SIZE_1GB: return 1024*1024*1024;
+	default: assert(false); return 4*1024;
+	}
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAllocMemory(
+    HSAuint32       PreferredNode,          //IN
+    HSAuint64       SizeInBytes,            //IN  (multiple of page size)
+    HsaMemFlags     MemFlags,               //IN
+    void**          MemoryAddress           //OUT (page-aligned)
+    )
+{
+	CHECK_KFD_OPEN();
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+	int err;
+
+	result = validate_nodeid(PreferredNode, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	// The required size should be page aligned (GDS?)
+	HSAuint64 page_size = PageSizeFromFlags(MemFlags.ui32.PageSize);
+	if ((SizeInBytes & (page_size-1)) && !MemFlags.ui32.GDSMemory){
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+	}
+
+	if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged) {
+		err = posix_memalign(MemoryAddress, page_size, SizeInBytes);
+		if (err != 0)
+			return HSAKMT_STATUS_NO_MEMORY;
+		if (MemFlags.ui32.ExecuteAccess) {
+			err = mprotect(*MemoryAddress, SizeInBytes, PROT_READ | PROT_WRITE | PROT_EXEC);
+			if (err != 0) {
+				free(*MemoryAddress);
+				return err;
+			}
+		}
+		return HSAKMT_STATUS_SUCCESS;
+	}
+
+	return HSAKMT_STATUS_INVALID_PARAMETER;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtFreeMemory(
+    void*       MemoryAddress,      //IN (page-aligned)
+    HSAuint64   SizeInBytes         //IN
+    )
+{
+	HSAKMT_STATUS hsa_status = HSAKMT_STATUS_SUCCESS;
+	CHECK_KFD_OPEN();
+
+	if (fmm_is_inside_some_aperture(MemoryAddress)){
+		if (fmm_release( MemoryAddress, SizeInBytes))
+			hsa_status = HSAKMT_STATUS_INVALID_PARAMETER;
+	}
+	else
+		free(MemoryAddress);
+
+	return hsa_status;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtRegisterMemory(
+    void*       MemoryAddress,      //IN (page-aligned)
+    HSAuint64   MemorySizeInBytes   //IN (page-aligned)
+    )
+{
+	CHECK_KFD_OPEN();
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDeregisterMemory(
+    void*       MemoryAddress  //IN
+    )
+{
+	CHECK_KFD_OPEN();
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtMapMemoryToGPU(
+    void*           MemoryAddress,     //IN (page-aligned)
+    HSAuint64       MemorySizeInBytes, //IN (page-aligned)
+    HSAuint64*      AlternateVAGPU     //OUT (page-aligned)
+    )
+{
+	CHECK_KFD_OPEN();
+
+	// We don't support GPUVM in the stub, there should never be a request for a GPUVA.
+	if (AlternateVAGPU)
+	{
+		*AlternateVAGPU = 0;
+	}
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUnmapMemoryToGPU(
+    void*           MemoryAddress       //IN (page-aligned)
+    )
+{
+	CHECK_KFD_OPEN();
+
+	return HSAKMT_STATUS_SUCCESS;
+}
diff --git a/hsakmt/openclose.c b/hsakmt/openclose.c
new file mode 100644
index 0000000..d5b91e2
--- /dev/null
+++ b/hsakmt/openclose.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "fmm.h"
+
+static const char kfd_device_name[] = "/dev/kfd";
+static const char tmp_file[] = "/var/lock/.amd_hsa_thunk_lock";
+int amd_hsa_thunk_lock_fd = 0;
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtOpenKFD(void)
+{
+	HSAKMT_STATUS result;
+
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	if (kfd_open_count == 0)
+	{
+		int fd = open(kfd_device_name, O_RDWR | O_CLOEXEC);
+
+		if (fd != -1)
+		{
+			kfd_fd = fd;
+			kfd_open_count = 1;
+
+			result = fmm_init_process_apertures();
+			if (result != HSAKMT_STATUS_SUCCESS)
+				close(fd);
+		}
+		else
+		{
+			result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
+		}
+
+		amd_hsa_thunk_lock_fd = open(tmp_file,
+				O_CREAT | //create the file if it's not present.
+				O_RDWR, //only need write access for the internal locking semantics.
+				S_IRUSR | S_IWUSR); //permissions on the file, 600 here.
+	}
+	else
+	{
+		kfd_open_count++;
+		result = HSAKMT_STATUS_SUCCESS;
+	}
+
+	pthread_mutex_unlock(&hsakmt_mutex);
+
+	return result;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCloseKFD(void)
+{
+	HSAKMT_STATUS result;
+
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	if (kfd_open_count > 0)
+	{
+		if (--kfd_open_count == 0)
+		{
+			close(kfd_fd);
+
+			if (amd_hsa_thunk_lock_fd > 0) {
+				close(amd_hsa_thunk_lock_fd);
+				unlink(tmp_file);
+			}
+
+		}
+
+		result = HSAKMT_STATUS_SUCCESS;
+	}
+	else
+	{
+		result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
+	}
+
+	pthread_mutex_unlock(&hsakmt_mutex);
+
+	return result;
+}
diff --git a/hsakmt/perfctr.c b/hsakmt/perfctr.c
new file mode 100644
index 0000000..64ab168
--- /dev/null
+++ b/hsakmt/perfctr.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include "libhsakmt.h"
+#include "pmc_table.h"
+#include "linux/kfd_ioctl.h"
+#include <unistd.h>
+
+#define BITS_PER_BYTE                   CHAR_BIT
+
+#define HSA_PERF_MAGIC4CC   0x54415348
+
+enum perf_trace_state {
+    PERF_TRACE_STATE__STOPPED = 0,
+    PERF_TRACE_STATE__STARTED
+};
+
+struct perf_trace {
+    uint32_t magic4cc;
+    uint32_t  gpu_id;
+    enum perf_trace_state state;
+};
+
+extern int amd_hsa_thunk_lock_fd;
+
+static HsaCounterProperties *counter_props[MAX_NODES] = {NULL};
+
+static int blockid2uuid(enum perf_block_id block_id, HSA_UUID *uuid)
+{
+    int rc = 0;
+    switch (block_id) {
+    case PERFCOUNTER_BLOCKID__SQ:
+        *uuid = HSA_PROFILEBLOCK_AMD_SQ;
+        break;
+    default:
+        /* If we reach this point, it's a bug */
+        rc = -1;
+    }
+
+    return rc;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcGetCounterProperties(
+    HSAuint32                   NodeId,             //IN
+    HsaCounterProperties**      CounterProperties   //OUT
+    )
+{
+    HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
+    uint32_t gpu_id, i, block_id;
+    uint16_t dev_id;
+    uint32_t counter_props_size = 0;
+    uint32_t total_counters = 0;
+    uint32_t total_concurrent = 0;
+    struct perf_counter_block block = {0};
+
+    if (CounterProperties == NULL)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    if (validate_nodeid(NodeId, &gpu_id) != 0)
+        return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+
+
+    if (counter_props[NodeId] == NULL) {
+        dev_id = get_device_id_by_node(NodeId);
+        for (i = 0; i < PERFCOUNTER_BLOCKID__MAX; i++) {
+            rc = get_block_properties(dev_id, i, &block);
+            if (rc != HSAKMT_STATUS_SUCCESS)
+                return rc;
+            total_concurrent += block.num_of_slots;
+            total_counters += block.num_of_counters;
+        }
+
+        counter_props_size = sizeof(HsaCounterProperties) +
+                sizeof(HsaCounterBlockProperties)*(PERFCOUNTER_BLOCKID__MAX-1) +
+                sizeof(HsaCounter)*(total_counters-1);
+
+        counter_props[NodeId] = malloc(counter_props_size);
+
+        if (counter_props[NodeId] == NULL)
+            return HSAKMT_STATUS_NO_MEMORY;
+
+        counter_props[NodeId]->NumBlocks = PERFCOUNTER_BLOCKID__MAX;
+        counter_props[NodeId]->NumConcurrent = total_concurrent;
+
+        for (block_id = 0; block_id < PERFCOUNTER_BLOCKID__MAX; block_id++)
+        {
+            rc = get_block_properties(dev_id, block_id, &block);
+            if (rc != HSAKMT_STATUS_SUCCESS) {
+                free(counter_props[NodeId]);
+                return rc;
+            }
+
+            /* Filling the SQ block */
+            blockid2uuid(block_id, &counter_props[NodeId]->Blocks[block_id].BlockId);
+            counter_props[NodeId]->Blocks[block_id].NumCounters = block.num_of_counters;
+            counter_props[NodeId]->Blocks[block_id].NumConcurrent = block.num_of_slots;
+
+            for (i = 0; i < block.num_of_counters; i++) {
+                counter_props[NodeId]->Blocks[block_id].Counters[i].BlockIndex = block_id;
+                counter_props[NodeId]->Blocks[block_id].Counters[i].CounterId = block.counter_ids[i];
+                counter_props[NodeId]->Blocks[block_id].Counters[i].CounterSizeInBits = block.counter_size_in_bits;
+                counter_props[NodeId]->Blocks[block_id].Counters[i].CounterMask = block.counter_mask;
+                counter_props[NodeId]->Blocks[block_id].Counters[i].Flags.ui32.Global = 1;
+                counter_props[NodeId]->Blocks[block_id].Counters[i].Type = HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE;
+            }
+        }
+    }
+
+    *CounterProperties = counter_props[NodeId];
+
+    return HSAKMT_STATUS_SUCCESS;
+}
+
+/**
+  Registers a set of (HW) counters to be used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcRegisterTrace(
+    HSAuint32           NodeId,             //IN
+    HSAuint32           NumberOfCounters,   //IN
+    HsaCounter*         Counters,           //IN
+    HsaPmcTraceRoot*    TraceRoot           //OUT
+    )
+{
+    uint32_t gpu_id, i;
+    uint64_t min_buf_size = 0;
+    uint32_t concurrent_counters[PERFCOUNTER_BLOCKID__MAX] = {0};
+    struct perf_trace *trace = NULL;
+
+    if (Counters == NULL || TraceRoot == NULL || NumberOfCounters == 0)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    if (validate_nodeid(NodeId, &gpu_id) != 0)
+        return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+    /* Calculating the minimum buffer size */
+    for (i = 0; i < NumberOfCounters; i++) {
+        if (Counters[i].BlockIndex >= PERFCOUNTER_BLOCKID__MAX)
+            return HSAKMT_STATUS_INVALID_PARAMETER;
+        min_buf_size += Counters[i].CounterSizeInBits/BITS_PER_BYTE;
+        concurrent_counters[Counters[i].BlockIndex]++;
+    }
+
+    /* Verifying that the number of counters per block is not larger than the amount of slots */
+    if (concurrent_counters[PERFCOUNTER_BLOCKID__SQ] > counter_props[NodeId]->Blocks[PERFCOUNTER_BLOCKID__SQ].NumConcurrent)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    trace = malloc(sizeof(trace));
+    if (trace == NULL)
+        return HSAKMT_STATUS_NO_MEMORY;
+
+    trace->magic4cc = HSA_PERF_MAGIC4CC;
+    trace->gpu_id = gpu_id;
+    trace->state = PERF_TRACE_STATE__STOPPED;
+
+    TraceRoot->NumberOfPasses = 1;
+    TraceRoot->TraceBufferMinSizeBytes = PAGE_ALIGN_UP(min_buf_size);
+    TraceRoot->TraceId = PORT_VPTR_TO_UINT64(trace);
+
+    return HSAKMT_STATUS_SUCCESS;
+}
+
+/**
+  Unregisters a set of (HW) counters used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcUnregisterTrace(
+    HSAuint32   NodeId,     //IN
+    HSATraceId  TraceId     //IN
+    )
+{
+    uint32_t gpu_id;
+    struct perf_trace *trace;
+
+    if (TraceId == 0)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    if (validate_nodeid(NodeId, &gpu_id) != 0)
+        return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+    trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+    if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+        return HSAKMT_STATUS_INVALID_HANDLE;
+
+    if (trace->gpu_id != gpu_id)
+        return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+    /* If the trace is in the running state, stop it */
+    if (trace->state == PERF_TRACE_STATE__STARTED) {
+        HSAKMT_STATUS status = hsaKmtPmcStopTrace(TraceId);
+        if (status != HSAKMT_STATUS_SUCCESS)
+            return status;
+    }
+
+    free(trace);
+
+    return HSAKMT_STATUS_SUCCESS;
+}
+
+
+/**
+  Allows a user mode process to get exclusive access to the defined set of (HW) counters
+  used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcAcquireTraceAccess(
+    HSAuint32   NodeId,     //IN
+    HSATraceId  TraceId     //IN
+    )
+{
+    struct perf_trace *trace;
+
+    if (TraceId == 0)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+    if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+        return HSAKMT_STATUS_INVALID_HANDLE;
+
+    if (amd_hsa_thunk_lock_fd > 0) {
+	if (lockf( amd_hsa_thunk_lock_fd, F_TLOCK, 0 ) != 0)
+	    return HSAKMT_STATUS_ERROR;
+	else
+	   return HSAKMT_STATUS_SUCCESS;
+    }
+    else {
+	    return HSAKMT_STATUS_ERROR;
+    }
+}
+
+
+/**
+  Allows a user mode process to release exclusive access to the defined set of (HW) counters
+  used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcReleaseTraceAccess(
+    HSAuint32   NodeId,     //IN
+    HSATraceId  TraceId     //IN
+    )
+{
+    struct perf_trace *trace;
+
+    if (TraceId == 0)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+    if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+        return HSAKMT_STATUS_INVALID_HANDLE;
+
+    if (amd_hsa_thunk_lock_fd > 0) {
+	if (lockf( amd_hsa_thunk_lock_fd, F_ULOCK, 0 ) != 0)
+	    return HSAKMT_STATUS_ERROR;
+	else
+	   return HSAKMT_STATUS_SUCCESS;
+    }
+    else {
+	    return HSAKMT_STATUS_ERROR;
+    }
+
+}
+
+
+/**
+  Starts tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStartTrace(
+    HSATraceId  TraceId,                //IN
+    void*       TraceBuffer,            //IN (page aligned)
+    HSAuint64   TraceBufferSizeBytes    //IN (page aligned)
+    )
+{
+    struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+    if (TraceId == 0 || TraceBuffer == NULL || TraceBufferSizeBytes == 0)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+        return HSAKMT_STATUS_INVALID_HANDLE;
+
+    trace->state = PERF_TRACE_STATE__STARTED;
+
+    return HSAKMT_STATUS_SUCCESS;
+}
+
+
+/**
+   Forces an update of all the counters that a previously started trace operation has registered
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcQueryTrace(
+    HSATraceId    TraceId   //IN
+    )
+{
+    struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+    if (TraceId == 0)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+        return HSAKMT_STATUS_INVALID_HANDLE;
+
+    return HSAKMT_STATUS_SUCCESS;
+}
+
+
+/**
+  Stops tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStopTrace(
+    HSATraceId  TraceId     //IN
+    )
+{
+    struct perf_trace *trace = (struct perf_trace *)PORT_UINT64_TO_VPTR(TraceId);
+
+    if (TraceId == 0)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    if (trace->magic4cc != HSA_PERF_MAGIC4CC)
+        return HSAKMT_STATUS_INVALID_HANDLE;
+
+    trace->state = PERF_TRACE_STATE__STOPPED;
+
+    return HSAKMT_STATUS_SUCCESS;
+}
diff --git a/hsakmt/pmc_table.c b/hsakmt/pmc_table.c
new file mode 100644
index 0000000..0390639
--- /dev/null
+++ b/hsakmt/pmc_table.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "pmc_table.h"
+
+
+static uint32_t kaveri_sq_counter_ids[] = {
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+    23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+    43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+    63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+    83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101,
+    102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+    118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
+    134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 168, 169, 170,
+    171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
+    187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202,
+    203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
+    219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
+    235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250
+};
+
+static uint32_t carrizo_sq_counter_ids[] = {
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+    23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+    43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+    63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+    83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101,
+    102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
+    118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
+    134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 168, 169, 170,
+    171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
+    187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202,
+    203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218,
+    219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
+    235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250
+};
+
+static struct perf_counter_block kaveri_blocks[PERFCOUNTER_BLOCKID__MAX] = {
+    [PERFCOUNTER_BLOCKID__SQ] = {
+        .num_of_slots = 16,
+        .num_of_counters = sizeof(kaveri_sq_counter_ids)/sizeof(*kaveri_sq_counter_ids),
+        .counter_ids = kaveri_sq_counter_ids,
+        .counter_size_in_bits = 64,
+        .counter_mask = BITMASK(64)
+    },
+};
+
+static struct perf_counter_block carrizo_blocks[PERFCOUNTER_BLOCKID__MAX] = {
+    [PERFCOUNTER_BLOCKID__SQ] = {
+        .num_of_slots = 16,
+        .num_of_counters = sizeof(carrizo_sq_counter_ids)/sizeof(*carrizo_sq_counter_ids),
+        .counter_ids = carrizo_sq_counter_ids,
+        .counter_size_in_bits = 64,
+        .counter_mask = BITMASK(64)
+    },
+};
+
+HSAKMT_STATUS
+get_block_properties(uint16_t dev_id,
+                     enum perf_block_id block_id,
+                     struct perf_counter_block *block)
+{
+    HSAKMT_STATUS rc = HSAKMT_STATUS_SUCCESS;
+    if (block_id > PERFCOUNTER_BLOCKID__MAX || block_id < PERFCOUNTER_BLOCKID__FIRST)
+        return HSAKMT_STATUS_INVALID_PARAMETER;
+
+    switch(dev_id) {
+        case 0x1304:
+        case 0x1305:
+        case 0x1306:
+        case 0x1307:
+        case 0x1309:
+        case 0x130A:
+        case 0x130B:
+        case 0x130C:
+        case 0x130D:
+        case 0x130E:
+        case 0x130F:
+        case 0x1310:
+        case 0x1311:
+        case 0x1312:
+        case 0x1313:
+        case 0x1315:
+        case 0x1316:
+        case 0x1317:
+        case 0x1318:
+        case 0x131B:
+        case 0x131C:
+        case 0x131D:
+            *block = kaveri_blocks[block_id];
+            break;
+
+        case 0x9870:
+        case 0x9874:
+        case 0x9875:
+        case 0x9876:
+        case 0x9877:
+            *block = carrizo_blocks[block_id];
+            break;
+
+        default:
+            rc = HSAKMT_STATUS_INVALID_PARAMETER;
+    }
+
+    return rc;
+}
+
+
diff --git a/hsakmt/pmc_table.h b/hsakmt/pmc_table.h
new file mode 100644
index 0000000..35ed07e
--- /dev/null
+++ b/hsakmt/pmc_table.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef PMC_TABLE_H
+#define PMC_TABLE_H
+
+#include "libhsakmt.h"
+
+enum perf_block_id {
+    PERFCOUNTER_BLOCKID__FIRST = 0,
+    PERFCOUNTER_BLOCKID__SQ = PERFCOUNTER_BLOCKID__FIRST,
+    PERFCOUNTER_BLOCKID__MAX
+};
+
+struct perf_counter_block {
+    uint32_t    num_of_slots;
+    uint32_t    num_of_counters;
+    uint32_t    *counter_ids;
+    uint32_t    counter_size_in_bits;
+    uint64_t    counter_mask;
+};
+
+HSAKMT_STATUS
+get_block_properties(uint16_t dev_id,
+                     enum perf_block_id block_id,
+                     struct perf_counter_block *block);
+
+#endif // PMC_TABLE_H
diff --git a/hsakmt/queues.c b/hsakmt/queues.c
new file mode 100644
index 0000000..2d7692f
--- /dev/null
+++ b/hsakmt/queues.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <math.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+/* 1024 doorbells, 4 bytes each doorbell */
+#define DOORBELLS_PAGE_SIZE	1024 * 4
+
+struct device_info
+{
+	uint32_t ctx_save_restore_size;
+	uint32_t eop_buffer_size;
+};
+
+struct device_info kaveri_device_info = {
+	.ctx_save_restore_size = 0,
+	.eop_buffer_size = 0,
+};
+
+struct device_info carrizo_device_info = {
+	.ctx_save_restore_size = 2756608,
+	.eop_buffer_size = 4096,
+};
+
+struct device_id
+{
+	uint16_t dev_id;
+	struct device_info *dev_info;
+};
+
+struct device_id supported_devices[] = {
+	{ 0x1304, &kaveri_device_info },	/* Kaveri */
+	{ 0x1305, &kaveri_device_info },	/* Kaveri */
+	{ 0x1306, &kaveri_device_info },	/* Kaveri */
+	{ 0x1307, &kaveri_device_info },	/* Kaveri */
+	{ 0x1309, &kaveri_device_info },	/* Kaveri */
+	{ 0x130A, &kaveri_device_info },	/* Kaveri */
+	{ 0x130B, &kaveri_device_info },	/* Kaveri */
+	{ 0x130C, &kaveri_device_info },	/* Kaveri */
+	{ 0x130D, &kaveri_device_info },	/* Kaveri */
+	{ 0x130E, &kaveri_device_info },	/* Kaveri */
+	{ 0x130F, &kaveri_device_info },	/* Kaveri */
+	{ 0x1310, &kaveri_device_info },	/* Kaveri */
+	{ 0x1311, &kaveri_device_info },	/* Kaveri */
+	{ 0x1312, &kaveri_device_info },	/* Kaveri */
+	{ 0x1313, &kaveri_device_info },	/* Kaveri */
+	{ 0x1315, &kaveri_device_info },	/* Kaveri */
+	{ 0x1316, &kaveri_device_info },	/* Kaveri */
+	{ 0x1317, &kaveri_device_info },	/* Kaveri */
+	{ 0x1318, &kaveri_device_info },	/* Kaveri */
+	{ 0x131B, &kaveri_device_info },	/* Kaveri */
+	{ 0x131C, &kaveri_device_info },	/* Kaveri */
+	{ 0x131D, &kaveri_device_info },	/* Kaveri */
+	{ 0x9870, &carrizo_device_info },	/* Carrizo */
+	{ 0x9874, &carrizo_device_info },	/* Carrizo */
+	{ 0x9875, &carrizo_device_info },	/* Carrizo */
+	{ 0x9876, &carrizo_device_info },	/* Carrizo */
+	{ 0x9877, &carrizo_device_info },	/* Carrizo */
+	{ 0, NULL }
+};
+
+struct queue
+{
+	uint32_t queue_id;
+	uint32_t wptr;
+	uint32_t rptr;
+	void *eop_buffer;
+	void *ctx_save_restore;
+};
+
+struct process_doorbells
+{
+	bool need_mmap;
+	void* doorbells;
+	pthread_mutex_t doorbells_mutex;
+};
+
+struct process_doorbells doorbells[] = {[0 ... (NUM_OF_SUPPORTED_GPUS-1)] = {.need_mmap = true, .doorbells = NULL, .doorbells_mutex = PTHREAD_MUTEX_INITIALIZER}};
+
+static struct device_info *get_device_info_by_dev_id(uint16_t dev_id)
+{
+	int i = 0;
+	while (supported_devices[i].dev_id != 0) {
+		if (supported_devices[i].dev_id == dev_id) {
+			return supported_devices[i].dev_info;
+		}
+		i++;
+	}
+
+	return NULL;
+}
+
+static void free_queue(struct queue *q)
+{
+	if (q->eop_buffer)
+		free(q->eop_buffer);
+	if (q->ctx_save_restore)
+		free(q->ctx_save_restore);
+	free(q);
+}
+
+static void* allocate_exec_aligned_memory(uint32_t size, uint32_t align)
+{
+	void *ptr;
+	int retval;
+
+	retval = posix_memalign(&ptr, align, size);
+	if (retval != 0)
+		return NULL;
+
+	retval = mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+	if (retval != 0) {
+		free(ptr);
+		return NULL;
+	}
+
+	memset(ptr, 0, size);
+	return ptr;
+}
+
+static int handle_concrete_asic(struct device_info *dev_info, struct queue *q,
+								struct kfd_ioctl_create_queue_args *args)
+{
+	if (dev_info) {
+		if (dev_info->eop_buffer_size > 0) {
+			q->eop_buffer =
+					allocate_exec_aligned_memory(dev_info->eop_buffer_size, PAGE_SIZE);
+			if (q->eop_buffer == NULL) {
+				return HSAKMT_STATUS_NO_MEMORY;
+			}
+			args->eop_buffer_address = (uintptr_t)q->eop_buffer;
+			args->eop_buffer_size = dev_info->eop_buffer_size;
+		}
+		if (dev_info->ctx_save_restore_size > 0) {
+			args->ctx_save_restore_size = dev_info->ctx_save_restore_size;
+			q->ctx_save_restore =
+					allocate_exec_aligned_memory(dev_info->ctx_save_restore_size, PAGE_SIZE);
+			if (q->ctx_save_restore == NULL) {;
+				return HSAKMT_STATUS_NO_MEMORY;
+			}
+			args->ctx_save_restore_address = (uintptr_t)q->ctx_save_restore;
+		}
+	}
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateQueue(
+    HSAuint32           NodeId,           //IN
+    HSA_QUEUE_TYPE      Type,             //IN
+    HSAuint32           QueuePercentage,  //IN
+    HSA_QUEUE_PRIORITY  Priority,         //IN
+    void*               QueueAddress,     //IN
+    HSAuint64           QueueSizeInBytes, //IN
+    HsaEvent*           Event,            //IN
+    HsaQueueResource*   QueueResource     //OUT
+    )
+{
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+	uint16_t dev_id;
+	struct device_info *dev_info;
+	int err;
+	void* ptr;
+	CHECK_KFD_OPEN();
+
+	result = validate_nodeid(NodeId, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	struct queue *q = malloc(sizeof(struct queue));
+	if (q == NULL)
+		return HSAKMT_STATUS_NO_MEMORY;
+	memset(q, 0, sizeof(*q));
+
+	struct kfd_ioctl_create_queue_args args;
+	memset(&args, 0, sizeof(args));
+
+	dev_id = get_device_id_by_node(NodeId);
+	dev_info = get_device_info_by_dev_id(dev_id);
+	args.gpu_id = gpu_id;
+
+	err = handle_concrete_asic(dev_info, q, &args);
+	if (err != HSAKMT_STATUS_SUCCESS) {
+		free_queue(q);
+		return err;
+	}
+
+	switch (Type)
+	{
+	case HSA_QUEUE_COMPUTE: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE; break;
+	case HSA_QUEUE_SDMA: free(q); return HSAKMT_STATUS_NOT_IMPLEMENTED;
+	case HSA_QUEUE_COMPUTE_AQL: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; break;
+	default: free_queue(q); return HSAKMT_STATUS_INVALID_PARAMETER;
+	}
+
+	if (Type != HSA_QUEUE_COMPUTE_AQL)
+	{
+		QueueResource->QueueRptrValue = (uintptr_t)&q->rptr;
+		QueueResource->QueueWptrValue = (uintptr_t)&q->wptr;
+	}
+
+	args.read_pointer_address = QueueResource->QueueRptrValue;
+	args.write_pointer_address = QueueResource->QueueWptrValue;
+	args.ring_base_address = (uintptr_t)QueueAddress;
+	args.ring_size = QueueSizeInBytes;
+	args.queue_percentage = QueuePercentage;
+	args.queue_priority = Priority;
+
+	err = kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_QUEUE, &args);
+
+	if (err == -1)
+	{
+		free_queue(q);
+		return HSAKMT_STATUS_ERROR;
+	}
+
+	q->queue_id = args.queue_id;
+
+	pthread_mutex_lock(&doorbells[NodeId].doorbells_mutex);
+
+	if (doorbells[NodeId].need_mmap) {
+		ptr = mmap(0, DOORBELLS_PAGE_SIZE, PROT_READ|PROT_WRITE,
+				MAP_SHARED, kfd_fd, args.doorbell_offset);
+
+		if (ptr == MAP_FAILED) {
+			pthread_mutex_unlock(&doorbells[NodeId].doorbells_mutex);
+			hsaKmtDestroyQueue(q->queue_id);
+			free_queue(q);
+			return HSAKMT_STATUS_ERROR;
+		}
+
+		doorbells[NodeId].need_mmap = false;
+		doorbells[NodeId].doorbells = ptr;
+	}
+
+	pthread_mutex_unlock(&doorbells[NodeId].doorbells_mutex);
+
+	QueueResource->QueueId = PORT_VPTR_TO_UINT64(q);
+	QueueResource->Queue_DoorBell = VOID_PTR_ADD32(doorbells[NodeId].doorbells, q->queue_id);
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUpdateQueue(
+    HSA_QUEUEID         QueueId,        //IN
+    HSAuint32           QueuePercentage,//IN
+    HSA_QUEUE_PRIORITY  Priority,       //IN
+    void*               QueueAddress,   //IN
+    HSAuint64           QueueSize,      //IN
+    HsaEvent*           Event           //IN
+    )
+{
+	struct kfd_ioctl_update_queue_args arg;
+	struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
+
+	CHECK_KFD_OPEN();
+
+	if (q == NULL)
+		return (HSAKMT_STATUS_INVALID_PARAMETER);
+	arg.queue_id = (HSAuint32)q->queue_id;
+	arg.ring_base_address = (uintptr_t)QueueAddress;
+	arg.ring_size = QueueSize;
+	arg.queue_percentage = QueuePercentage;
+	arg.queue_priority = Priority;
+
+	int err = kmtIoctl(kfd_fd, AMDKFD_IOC_UPDATE_QUEUE, &arg);
+	if (err == -1)
+	{
+		return HSAKMT_STATUS_ERROR;
+	}
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyQueue(
+    HSA_QUEUEID         QueueId         //IN
+    )
+{
+	CHECK_KFD_OPEN();
+
+	struct queue *q = PORT_UINT64_TO_VPTR(QueueId);
+	struct kfd_ioctl_destroy_queue_args args;
+
+	if (q == NULL)
+			return (HSAKMT_STATUS_INVALID_PARAMETER);
+
+	memset(&args, 0, sizeof(args));
+
+	args.queue_id = q->queue_id;
+
+	int err = kmtIoctl(kfd_fd, AMDKFD_IOC_DESTROY_QUEUE, &args);
+
+	if (err == -1)
+	{
+		return HSAKMT_STATUS_ERROR;
+	}
+	else
+	{
+		free_queue(q);
+		return HSAKMT_STATUS_SUCCESS;
+	}
+}
diff --git a/hsakmt/time.c b/hsakmt/time.c
new file mode 100644
index 0000000..45709f9
--- /dev/null
+++ b/hsakmt/time.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include "linux/kfd_ioctl.h"
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetClockCounters(
+    HSAuint32         NodeId,  //IN
+    HsaClockCounters* Counters //OUT
+    )
+{
+	HSAKMT_STATUS result;
+	uint32_t gpu_id;
+	struct kfd_ioctl_get_clock_counters_args args;
+	int err;
+
+	CHECK_KFD_OPEN();
+
+	result = validate_nodeid(NodeId, &gpu_id);
+	if (result != HSAKMT_STATUS_SUCCESS)
+		return result;
+
+	args.gpu_id = gpu_id;
+
+	err = kmtIoctl(kfd_fd, AMDKFD_IOC_GET_CLOCK_COUNTERS, &args);
+	if (err < 0) {
+		result = HSAKMT_STATUS_ERROR;
+	} else {
+		/* At this point the result is already HSAKMT_STATUS_SUCCESS */
+		Counters->GPUClockCounter = args.gpu_clock_counter;
+		Counters->CPUClockCounter = args.cpu_clock_counter;
+		Counters->SystemClockCounter = args.system_clock_counter;
+		Counters->SystemClockFrequencyHz = args.system_clock_freq;
+	}
+
+	return result;
+}
diff --git a/hsakmt/topology.c b/hsakmt/topology.c
new file mode 100644
index 0000000..903b6f7
--- /dev/null
+++ b/hsakmt/topology.c
@@ -0,0 +1,991 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <malloc.h>
+#include <string.h>
+
+#include "libhsakmt.h"
+#include "fmm.h"
+#define PAGE_SIZE 4096
+#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
+#define NUM_OF_HEAPS 2
+/* SYSFS related */
+#define KFD_SYSFS_PATH_GENERATION_ID "/sys/devices/virtual/kfd/kfd/topology/generation_id"
+#define KFD_SYSFS_PATH_SYSTEM_PROPERTIES "/sys/devices/virtual/kfd/kfd/topology/system_properties"
+#define KFD_SYSFS_PATH_NODES "/sys/devices/virtual/kfd/kfd/topology/nodes"
+
+typedef struct {
+	uint32_t gpu_id;
+	HsaNodeProperties node;
+	HsaMemoryProperties *mem;     /* node->NumBanks elements */
+	HsaCacheProperties *cache;
+	HsaIoLinkProperties *link;
+} node_t;
+
+static HsaSystemProperties *system = NULL;
+static node_t *node = NULL;
+
+static HSAKMT_STATUS topology_take_snapshot(void);
+static HSAKMT_STATUS topology_drop_snapshot(void);
+static int get_cpu_stepping(uint16_t* stepping);
+
+static void
+free_node(node_t *n)
+{
+	assert(n);
+
+	if (n == NULL)
+		return;
+
+	if ((n)->mem)
+		free((n)->mem);
+	if ((n)->cache)
+		free((n)->cache);
+	if ((n)->link)
+		free((n)->link);
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_generation(uint32_t *gen) {
+	FILE *fd;
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+	assert(gen);
+	fd = fopen(KFD_SYSFS_PATH_GENERATION_ID, "r");
+	if (!fd)
+		return HSAKMT_STATUS_ERROR;
+	if (fscanf(fd, "%ul", gen) != 1) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err;
+	}
+
+err:
+	fclose(fd);
+	return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_system_props(HsaSystemProperties *props) {
+	FILE *fd;
+	DIR *dirp;
+	char *read_buf, *p;
+	char prop_name[256];
+	long long unsigned int prop_val;
+	uint32_t node_count, prog;
+	struct dirent *dir;
+    int read_size;
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+
+	assert(props);
+	fd = fopen(KFD_SYSFS_PATH_SYSTEM_PROPERTIES, "r");
+	if (!fd)
+		return HSAKMT_STATUS_ERROR;
+
+	read_buf = malloc(PAGE_SIZE);
+	if (!read_buf) {
+		ret = HSAKMT_STATUS_NO_MEMORY;
+		goto err1;
+	}
+
+    read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+    if (read_size <= 0) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err2;
+	}
+
+    /* Since we're using the buffer as a string, we make sure the string terminates */
+    if(read_size >= PAGE_SIZE)
+        read_size = PAGE_SIZE-1;
+    read_buf[read_size] = 0;
+
+	/*
+	 * Read the system properties
+	 */
+	prog = 0;
+	p = read_buf;
+	while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+		if (strcmp(prop_name,"platform_oem") == 0)
+			props->PlatformOem = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"platform_id") == 0)
+			props->PlatformId = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"platform_rev") == 0)
+			props->PlatformRev = (uint32_t)prop_val;
+	}
+
+	/*
+	 * Discover the number of nodes
+	 */
+	node_count = 0;
+	dirp = opendir(KFD_SYSFS_PATH_NODES);
+	if(dirp) {
+		/*
+		 * Assuming that inside nodes folder there are only folders
+		 * which represent the node numbers
+		 */
+		while ((dir = readdir(dirp)) != 0) {
+			if ((strcmp(dir->d_name, ".") == 0) ||
+					(strcmp(dir->d_name, "..") == 0))
+				continue;
+			node_count++;
+		}
+		closedir(dirp);
+	}
+	props->NumNodes = node_count;
+
+
+err2:
+	free(read_buf);
+err1:
+	fclose(fd);
+	return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) {
+	FILE *fd;
+	char path[256];
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+	assert(gpu_id);
+	snprintf(path, 256, "%s/%d/gpu_id", KFD_SYSFS_PATH_NODES, node_id);
+	fd = fopen(path, "r");
+	if (!fd)
+		return HSAKMT_STATUS_ERROR;
+	if (fscanf(fd, "%ul", gpu_id) != 1) {
+		ret = HSAKMT_STATUS_ERROR;
+	}
+	fclose(fd);
+
+	return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32_t *gpu_id) {
+	FILE *fd;
+	char *read_buf, *p;
+	char prop_name[256];
+	char path[256];
+	long long unsigned int  prop_val;
+	uint32_t i, prog;
+	uint16_t stepping = 0, fw_version = 0;
+    int read_size;
+
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+	assert(props);
+	assert(gpu_id);
+	/* Retrieve the GPU ID */
+	ret = topology_sysfs_get_gpu_id(node_id, gpu_id);
+
+	/* Retrieve the marketing name of the node */
+	snprintf(path, 256, "%s/%d/name", KFD_SYSFS_PATH_NODES, node_id);
+	fd = fopen(path, "r");
+	if (!fd)
+		return HSAKMT_STATUS_ERROR;
+
+	read_buf = malloc(PAGE_SIZE);
+	if (!read_buf) {
+		ret = HSAKMT_STATUS_NO_MEMORY;
+		goto err1;
+	}
+
+    read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+    if (read_size <= 0) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err2;
+	}
+    p = memchr(read_buf, '\n', read_size);
+	if ((!p) || ((p-read_buf) > HSA_PUBLIC_NAME_SIZE)) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err2;
+	}
+	/*
+	 * Convert UTF8 to UTF16
+	 */
+	for (i = 0; (i < HSA_PUBLIC_NAME_SIZE) && (read_buf[i] != '\n'); i++)
+		props->MarketingName[i] = read_buf[i];
+	props->MarketingName[i] = 0;
+	fclose(fd);
+
+	/* Retrieve the node properties */
+	snprintf(path, 256, "%s/%d/properties", KFD_SYSFS_PATH_NODES, node_id);
+	fd = fopen(path, "r");
+	if (!fd) {
+		free(read_buf);
+        return HSAKMT_STATUS_ERROR;
+	}
+
+    read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+    if (read_size <= 0) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err2;
+	}
+
+    /* Since we're using the buffer as a string, we make sure the string terminates */
+    if(read_size >= PAGE_SIZE)
+        read_size = PAGE_SIZE-1;
+    read_buf[read_size] = 0;
+
+	/*
+     * Read the node properties
+	 */
+	prog = 0;
+	p = read_buf;
+	while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+		if (strcmp(prop_name,"cpu_cores_count") == 0)
+			props->NumCPUCores = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"simd_count") == 0)
+			props->NumFComputeCores = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"mem_banks_count") == 0)
+			props->NumMemoryBanks = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"caches_count") == 0)
+			props->NumCaches = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"io_links_count") == 0)
+			props->NumIOLinks = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"cpu_core_id_base") == 0)
+			props->CComputeIdLo = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"simd_id_base") == 0)
+			props->FComputeIdLo = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"capability") == 0)
+			props->Capability.Value = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"max_waves_per_simd") == 0)
+			props->MaxWavesPerSIMD = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"lds_size_in_kb") == 0)
+			props->LDSSizeInKB = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"gds_size_in_kb") == 0)
+			props->GDSSizeInKB = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"wave_front_size") == 0)
+			props->WaveFrontSize = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"array_count") == 0)
+			props->NumShaderBanks = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"simd_arrays_per_engine") == 0)
+			props->NumArrays = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"cu_per_simd_array") == 0)
+			props->NumCUPerArray = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"simd_per_cu") == 0)
+			props->NumSIMDPerCU = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"max_slots_scratch_cu") == 0)
+			props->MaxSlotsScratchCU = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"fw_version") == 0)
+			fw_version = (uint16_t)prop_val;
+		else if (strcmp(prop_name,"vendor_id") == 0)
+			props->VendorId = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"device_id") == 0)
+			props->DeviceId = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"location_id") == 0)
+			props->LocationId = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"max_engine_clk_fcompute") == 0)
+			props->MaxEngineClockMhzFCompute = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"max_engine_clk_ccompute") == 0)
+			props->MaxEngineClockMhzCCompute = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"local_mem_size") == 0)
+			props->LocalMemSize = (uint32_t)prop_val;
+
+	}
+
+	get_cpu_stepping(&stepping);
+	props->EngineId = ((stepping << 16) | fw_version);
+
+err2:
+	free(read_buf);
+err1:
+	fclose(fd);
+	return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_mem_props(uint32_t node_id, uint32_t mem_id, HsaMemoryProperties *props) {
+	FILE *fd;
+	char *read_buf, *p;
+	char prop_name[256];
+	char path[256];
+	long long unsigned int  prop_val;
+	uint32_t prog;
+    int read_size;
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+	assert(props);
+	snprintf(path, 256, "%s/%d/mem_banks/%d/properties", KFD_SYSFS_PATH_NODES, node_id, mem_id);
+	fd = fopen(path, "r");
+	if (!fd) {
+		return HSAKMT_STATUS_ERROR;
+	}
+	read_buf = malloc(PAGE_SIZE);
+	if (!read_buf) {
+		ret = HSAKMT_STATUS_NO_MEMORY;
+		goto err1;
+	}
+
+    read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+    if (read_size <= 0) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err2;
+	}
+
+    /* Since we're using the buffer as a string, we make sure the string terminates */
+    if(read_size >= PAGE_SIZE)
+        read_size = PAGE_SIZE-1;
+    read_buf[read_size] = 0;
+
+	prog = 0;
+	p = read_buf;
+	while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+		if (strcmp(prop_name,"heap_type") == 0)
+			props->HeapType = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"size_in_bytes") == 0)
+			props->SizeInBytes = prop_val;
+		else if (strcmp(prop_name,"flags") == 0)
+			props->Flags.MemoryProperty = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"width") == 0)
+			props->Width = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"mem_clk_max") == 0)
+			props->MemoryClockMax = (uint32_t)prop_val;
+	}
+
+err2:
+	free(read_buf);
+err1:
+	fclose(fd);
+	return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_cache_props(uint32_t node_id, uint32_t cache_id, HsaCacheProperties *props) {
+	FILE *fd;
+	char *read_buf, *p;
+	char prop_name[256];
+	char path[256];
+	long long unsigned int  prop_val;
+	uint32_t i, prog;
+    int read_size;
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+	assert(props);
+	snprintf(path, 256, "%s/%d/caches/%d/properties", KFD_SYSFS_PATH_NODES, node_id, cache_id);
+	fd = fopen(path, "r");
+	if (!fd) {
+		return HSAKMT_STATUS_ERROR;
+	}
+	read_buf = malloc(PAGE_SIZE);
+	if (!read_buf) {
+		ret = HSAKMT_STATUS_NO_MEMORY;
+		goto err1;
+	}
+
+    read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+    if (read_size <= 0) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err2;
+	}
+
+    /* Since we're using the buffer as a string, we make sure the string terminates */
+    if(read_size >= PAGE_SIZE)
+        read_size = PAGE_SIZE-1;
+    read_buf[read_size] = 0;
+
+	prog = 0;
+	p = read_buf;
+	while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+		if (strcmp(prop_name,"processor_id_low") == 0)
+			props->ProcessorIdLow = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"level") == 0)
+			props->CacheLevel = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"size") == 0)
+			props->CacheSize = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"cache_line_size") == 0)
+			props->CacheLineSize = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"cache_lines_per_tag") == 0)
+			props->CacheLinesPerTag = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"association") == 0)
+			props->CacheAssociativity = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"latency") == 0)
+			props->CacheLatency = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"type") == 0)
+			props->CacheType.Value = (uint32_t)prop_val;
+		else if (strcmp(prop_name, "sibling_map") == 0)
+			break;
+	}
+
+	prog = 0;
+	if ((sscanf(p, "sibling_map %n", &prog)) == 0 && prog) {
+		i = 0;
+		while ((i < HSA_CPU_SIBLINGS) &&
+			(sscanf(p+=prog, "%u%*[,\n]%n", &props->SiblingMap[i++],
+					&prog) == 1));
+	}
+
+err2:
+	free(read_buf);
+err1:
+	fclose(fd);
+	return ret;
+}
+
+static HSAKMT_STATUS
+topology_sysfs_get_iolink_props(uint32_t node_id, uint32_t iolink_id, HsaIoLinkProperties *props) {
+	FILE *fd;
+	char *read_buf, *p;
+	char prop_name[256];
+	char path[256];
+	long long unsigned int  prop_val;
+	uint32_t prog;
+    int read_size;
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+	assert(props);
+	snprintf(path, 256, "%s/%d/io_link/%d/properties", KFD_SYSFS_PATH_NODES, node_id, iolink_id);
+	fd = fopen(path, "r");
+	if (!fd) {
+		return HSAKMT_STATUS_ERROR;
+	}
+	read_buf = malloc(PAGE_SIZE);
+	if (!read_buf) {
+		ret = HSAKMT_STATUS_NO_MEMORY;
+		goto err1;
+	}
+
+    read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+    if (read_size <= 0) {
+		ret = HSAKMT_STATUS_ERROR;
+		goto err2;
+	}
+
+    /* Since we're using the buffer as a string, we make sure the string terminates */
+    if(read_size >= PAGE_SIZE)
+        read_size = PAGE_SIZE-1;
+    read_buf[read_size] = 0;
+
+	prog = 0;
+	p = read_buf;
+	while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) {
+		if (strcmp(prop_name,"type") == 0)
+			props->IoLinkType = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"version_major") == 0)
+			props->VersionMajor = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"version_minor") == 0)
+			props->VersionMinor = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"node_from") == 0)
+			props->NodeFrom = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"node_to") == 0)
+			props->NodeTo = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"weight") == 0)
+			props->Weight = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"min_latency") == 0)
+			props->MinimumLatency = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"max_latency") == 0)
+			props->MaximumLatency = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"min_bandwidth") == 0)
+			props->MinimumBandwidth = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"max_bandwidth") == 0)
+			props->MaximumBandwidth = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"recommended_transfer_size") == 0)
+			props->RecTransferSize = (uint32_t)prop_val;
+		else if (strcmp(prop_name,"flags") == 0)
+			props->Flags.LinkProperty = (uint32_t)prop_val;
+	}
+
+
+err2:
+	free(read_buf);
+err1:
+	fclose(fd);
+	return ret;
+}
+
+HSAKMT_STATUS
+topology_take_snapshot(void)
+{
+	uint32_t gen_start, gen_end, i, j, mem_id, cache_id, link_id;
+	HsaSystemProperties sys_props;
+	node_t *temp_nodes = 0;
+	HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
+
+retry:
+	ret = topology_sysfs_get_generation(&gen_start);
+	if (ret != HSAKMT_STATUS_SUCCESS)
+		return ret;
+	ret = topology_sysfs_get_system_props(&sys_props);
+	if (ret != HSAKMT_STATUS_SUCCESS)
+		return ret;
+	if(sys_props.NumNodes > 0) {
+		temp_nodes = calloc(sys_props.NumNodes * sizeof(node_t),1);
+		if (!temp_nodes)
+			return HSAKMT_STATUS_NO_MEMORY;
+		for (i = 0; i < sys_props.NumNodes; i++) {
+			ret = topology_sysfs_get_node_props(i,
+					&temp_nodes[i].node,
+					&temp_nodes[i].gpu_id);
+			if (ret != HSAKMT_STATUS_SUCCESS) {
+				for (j=0; j < i; j++)
+					free_node(&temp_nodes[j]);
+				free(temp_nodes);
+				goto err;
+			}
+			if (temp_nodes[i].node.NumMemoryBanks) {
+				temp_nodes[i].mem = calloc(temp_nodes[i].node.NumMemoryBanks * sizeof(HsaMemoryProperties), 1);
+				if (!temp_nodes[i].mem) {
+					ret = HSAKMT_STATUS_NO_MEMORY;
+					for (j=0; j <= i; j++)
+						free_node(&temp_nodes[j]);
+					free(temp_nodes);
+					goto err;
+				}
+				for (mem_id = 0; mem_id < temp_nodes[i].node.NumMemoryBanks; mem_id++) {
+					ret = topology_sysfs_get_mem_props(i, mem_id, &temp_nodes[i].mem[mem_id]);
+					if (ret != HSAKMT_STATUS_SUCCESS) {
+						for (j=0; j <= i; j++)
+							free_node(&temp_nodes[j]);
+						free(temp_nodes);
+						goto err;
+					}
+				}
+			}
+
+			if (temp_nodes[i].node.NumCaches) {
+				temp_nodes[i].cache = calloc(temp_nodes[i].node.NumCaches * sizeof(HsaCacheProperties), 1);
+				if (!temp_nodes[i].cache) {
+					ret = HSAKMT_STATUS_NO_MEMORY;
+					for (j=0; j <= i; j++)
+						free_node(&temp_nodes[j]);
+					free(temp_nodes);
+					goto err;
+				}
+				for (cache_id = 0; cache_id < temp_nodes[i].node.NumCaches; cache_id++) {
+					ret = topology_sysfs_get_cache_props(i, cache_id, &temp_nodes[i].cache[cache_id]);
+					if (ret != HSAKMT_STATUS_SUCCESS) {
+						for (j=0; j <= i; j++)
+							free_node(&temp_nodes[j]);
+						free(temp_nodes);
+						goto err;
+					}
+				}
+			}
+
+			if (temp_nodes[i].node.NumIOLinks) {
+				temp_nodes[i].link = calloc(temp_nodes[i].node.NumIOLinks * sizeof(HsaIoLinkProperties), 1);
+				if (!temp_nodes[i].link) {
+					ret = HSAKMT_STATUS_NO_MEMORY;
+					for (j=0; j <= i; j++)
+						free_node(&temp_nodes[j]);
+					free(temp_nodes);
+					goto err;
+				}
+				for (link_id = 0; link_id < temp_nodes[i].node.NumIOLinks; link_id++) {
+					ret = topology_sysfs_get_iolink_props(i, link_id, &temp_nodes[i].link[link_id]);
+					if (ret != HSAKMT_STATUS_SUCCESS) {
+						for (j=0; j <= i; j++)
+							free_node(&temp_nodes[j]);
+						free(temp_nodes);
+						goto err;
+					}
+				}
+			}
+
+		}
+	}
+
+	ret = topology_sysfs_get_generation(&gen_end);
+	if (ret != HSAKMT_STATUS_SUCCESS) {
+		if (temp_nodes) {
+			for (j=0; j < sys_props.NumNodes; j++)
+				free_node(&temp_nodes[j]);
+			free(temp_nodes);
+		}
+		goto err;
+	}
+
+	if (gen_start != gen_end) {
+		if (temp_nodes) {
+			for (j=0; j < sys_props.NumNodes; j++)
+				free_node(&temp_nodes[j]);
+			free(temp_nodes);
+			temp_nodes = 0;
+		}
+		goto retry;
+	}
+
+	if (!system) {
+		system = malloc(sizeof(HsaSystemProperties));
+		if (!system) {
+			if (temp_nodes) {
+				for (j=0; j < sys_props.NumNodes; j++)
+					free_node(&temp_nodes[j]);
+				free(temp_nodes);
+			}
+			return HSAKMT_STATUS_NO_MEMORY;
+		}
+	}
+
+	*system = sys_props;
+	if (node)
+		free(node);
+	node = temp_nodes;
+err:
+
+	return ret;
+}
+
+/*
+ * Drop the Snashot of the HSA topology information.
+ * Assume lock is held.
+ */
+HSAKMT_STATUS
+topology_drop_snapshot(void)
+{
+	HSAKMT_STATUS err;
+
+	if (!!system != !!node) {
+		printf("Probable inconsistency?\n");
+		err = HSAKMT_STATUS_SUCCESS;
+		goto out;
+	}
+
+	if (node) {
+		uint64_t nodeid;
+
+		/* Remove state */
+		for (nodeid = 0; nodeid < system->NumNodes; nodeid++) {
+			free_node(&node[nodeid]);
+		}
+
+		free(node);
+		node = NULL;
+	}
+
+	free(system);
+	system = NULL;
+	err = HSAKMT_STATUS_SUCCESS;
+
+out:
+	return err;
+}
+
+HSAKMT_STATUS
+validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
+{
+    if (nodeid >= MAX_NODES || !node || !system || system->NumNodes <= nodeid)
+		return HSAKMT_STATUS_INVALID_NODE_UNIT;
+	if (gpu_id)
+		*gpu_id = node[nodeid].gpu_id;
+
+	return HSAKMT_STATUS_SUCCESS;
+}
+
+HSAKMT_STATUS
+gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id){
+	uint64_t node_idx;
+	for(node_idx = 0; node_idx < system->NumNodes; node_idx++){
+		if (node[node_idx].gpu_id == gpu_id){
+			*node_id = node_idx;
+			return HSAKMT_STATUS_SUCCESS;
+		}
+	}
+
+	return HSAKMT_STATUS_INVALID_NODE_UNIT;
+
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAcquireSystemProperties(
+    HsaSystemProperties*  SystemProperties    //OUT
+    )
+{
+	HSAKMT_STATUS err;
+	CHECK_KFD_OPEN();
+
+	if (!SystemProperties)
+			return HSAKMT_STATUS_INVALID_PARAMETER;
+
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	err = topology_take_snapshot();
+	if (err != HSAKMT_STATUS_SUCCESS)
+		goto out;
+
+	assert(system);
+
+	*SystemProperties = *system;
+	err = HSAKMT_STATUS_SUCCESS;
+
+out:
+	pthread_mutex_unlock(&hsakmt_mutex);
+	return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReleaseSystemProperties(void)
+{
+	CHECK_KFD_OPEN();
+
+	HSAKMT_STATUS err;
+
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	err = topology_drop_snapshot();
+
+	pthread_mutex_unlock(&hsakmt_mutex);
+
+	return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeProperties(
+    HSAuint32               NodeId,            //IN
+    HsaNodeProperties*      NodeProperties     //OUT
+    )
+{
+	HSAKMT_STATUS err;
+	uint32_t gpu_id;
+
+	if (!NodeProperties)
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+
+	CHECK_KFD_OPEN();
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	/* KFD ADD page 18, snapshot protocol violation */
+	if (system == NULL) {
+		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+		assert(system);
+		goto out;
+	}
+
+	if (NodeId >= system->NumNodes) {
+		err = HSAKMT_STATUS_INVALID_PARAMETER;
+		goto out;
+	}
+
+	err = validate_nodeid(NodeId, &gpu_id);
+	if (err != HSAKMT_STATUS_SUCCESS)
+		return err;
+
+	*NodeProperties = node[NodeId].node;
+	NodeProperties->NumMemoryBanks += NUM_OF_HEAPS;
+
+	err = HSAKMT_STATUS_SUCCESS;
+
+out:
+	pthread_mutex_unlock(&hsakmt_mutex);
+	return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeMemoryProperties(
+    HSAuint32             NodeId,             //IN
+    HSAuint32             NumBanks,           //IN
+    HsaMemoryProperties*  MemoryProperties    //OUT
+    )
+{
+	HSAKMT_STATUS err;
+	uint32_t i, gpu_id;
+
+	if (!MemoryProperties)
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+
+	CHECK_KFD_OPEN();
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	/* KFD ADD page 18, snapshot protocol violation */
+	if (system == NULL) {
+		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+		assert(system);
+		goto out;
+	}
+
+	/* Check still necessary */
+	if (NodeId >= system->NumNodes ) {
+		err = HSAKMT_STATUS_INVALID_PARAMETER;
+		goto out;
+	}
+
+	err = validate_nodeid(NodeId, &gpu_id);
+	if (err != HSAKMT_STATUS_SUCCESS)
+		return err;
+
+	for (i = 0; i < MIN(node[NodeId].node.NumMemoryBanks, NumBanks); i++) {
+		assert(node[NodeId].mem);
+		MemoryProperties[i] = node[NodeId].mem[i];
+	}
+
+	/*Add LDS*/
+	if (i < NumBanks){
+		MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS;
+		MemoryProperties[i].SizeInBytes = node[NodeId].node.LDSSizeInKB * 1024;
+		MemoryProperties[i].VirtualBaseAddress = fmm_get_aperture_base(FMM_LDS, gpu_id);
+		i++;
+	}
+
+	/*Add Local memory - HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE*/
+	if ((i < NumBanks) && (node[NodeId].node.LocalMemSize > 0)) {
+		MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE;
+		MemoryProperties[i].SizeInBytes = node[NodeId].node.LocalMemSize;
+		MemoryProperties[i].VirtualBaseAddress = fmm_get_aperture_base(FMM_GPUVM, gpu_id);
+		i++;
+	}
+
+	err = HSAKMT_STATUS_SUCCESS;
+
+out:
+	pthread_mutex_unlock(&hsakmt_mutex);
+	return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeCacheProperties(
+    HSAuint32           NodeId,         //IN
+    HSAuint32           ProcessorId,    //IN
+    HSAuint32           NumCaches,      //IN
+    HsaCacheProperties* CacheProperties //OUT
+    )
+{
+	HSAKMT_STATUS err;
+	uint32_t i;
+
+	if (!CacheProperties)
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+
+	CHECK_KFD_OPEN();
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	/* KFD ADD page 18, snapshot protocol violation */
+	if (system == NULL) {
+		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+		assert(system);
+		goto out;
+	}
+
+	if (NodeId >= system->NumNodes || NumCaches > node[NodeId].node.NumCaches) {
+		err = HSAKMT_STATUS_INVALID_PARAMETER;
+		goto out;
+	}
+
+	for (i = 0; i < MIN(node[NodeId].node.NumCaches, NumCaches); i++) {
+		assert(node[NodeId].cache);
+		CacheProperties[i] = node[NodeId].cache[i];
+	}
+
+	err = HSAKMT_STATUS_SUCCESS;
+
+out:
+	pthread_mutex_unlock(&hsakmt_mutex);
+	return err;
+}
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeIoLinkProperties(
+    HSAuint32            NodeId,            //IN
+    HSAuint32            NumIoLinks,        //IN
+    HsaIoLinkProperties* IoLinkProperties  //OUT
+    )
+{
+	HSAKMT_STATUS err;
+	uint32_t i;
+
+	if (!IoLinkProperties)
+		return HSAKMT_STATUS_INVALID_PARAMETER;
+
+	CHECK_KFD_OPEN();
+
+	pthread_mutex_lock(&hsakmt_mutex);
+
+	/* KFD ADD page 18, snapshot protocol violation */
+	if (system == NULL) {
+		err = HSAKMT_STATUS_INVALID_NODE_UNIT;
+		assert(system);
+		goto out;
+	}
+
+	if (NodeId >= system->NumNodes || NumIoLinks > node[NodeId].node.NumIOLinks) {
+		err = HSAKMT_STATUS_INVALID_PARAMETER;
+		goto out;
+	}
+
+	for (i = 0; i < MIN(node[NodeId].node.NumIOLinks, NumIoLinks); i++) {
+		assert(node[NodeId].link);
+		IoLinkProperties[i] = node[NodeId].link[i];
+	}
+
+	err = HSAKMT_STATUS_SUCCESS;
+
+out:
+	pthread_mutex_unlock(&hsakmt_mutex);
+	return err;
+}
+
+uint16_t get_device_id_by_node(HSAuint32 node_id)
+{
+    if (!node || !system || system->NumNodes <= node_id)
+        return 0;
+
+    return node[node_id].node.DeviceId;
+}
+
+static int get_cpu_stepping(uint16_t* stepping)
+{
+	int ret;
+	FILE* fd = fopen("/proc/cpuinfo", "r");
+	if (!fd)
+		return -1;
+
+	char* read_buf = malloc(PAGE_SIZE);
+	if (!read_buf) {
+		ret = -1;
+		goto err1;
+	}
+
+	int read_size = fread(read_buf, 1, PAGE_SIZE, fd);
+	if (read_size <= 0) {
+		ret = -2;
+		goto err2;
+	}
+
+	/* Since we're using the buffer as a string, we make sure the string terminates */
+	if(read_size >= PAGE_SIZE)
+		read_size = PAGE_SIZE-1;
+	read_buf[read_size] = 0;
+
+	*stepping = 0;
+
+	char* p = strstr(read_buf, "stepping");
+	if (p)
+		sscanf(p , "stepping\t: %hu\n", stepping);
+
+err2:
+	free(read_buf);
+err1:
+	fclose(fd);
+
+	return ret;
+}
diff --git a/hsakmt/version.c b/hsakmt/version.c
new file mode 100644
index 0000000..95bfec6
--- /dev/null
+++ b/hsakmt/version.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "libhsakmt.h"
+#include <stdlib.h>
+#include <string.h>
+#include "linux/kfd_ioctl.h"
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetVersion(
+    HsaVersionInfo*  VersionInfo    //OUT
+    )
+{
+	CHECK_KFD_OPEN();
+
+	struct kfd_ioctl_get_version_args args;
+	memset(&args, 0, sizeof(args));
+
+	if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_VERSION, &args) == -1)
+		return HSAKMT_STATUS_ERROR;
+
+	VersionInfo->KernelInterfaceMajorVersion = args.major_version;
+	VersionInfo->KernelInterfaceMinorVersion = args.minor_version;
+
+	return HSAKMT_STATUS_SUCCESS;
+}
author	Oded Gabbay <oded.gabbay@gmail.com>	2015-09-30 11:42:21 +0300
committer	Oded Gabbay <oded.gabbay@gmail.com>	2015-09-30 11:43:59 +0300
commit	27675a5f87f0c11ab8a59f119518f627598c4caf (patch)
tree	62dbe253bbd7df7b2e85d84668b89ce7adda6e86 /hsakmt
parent	bbdfa9eeb6dd015f22479368d2440d62785a4bb8 (diff)