summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@amd.com>2014-07-29 10:55:52 +0300
committerOded Gabbay <oded.gabbay@amd.com>2014-07-29 10:55:52 +0300
commit813af2b89aa6d0077bb50d8e3a98d637f219040f (patch)
treeda24fd7777504a4cea0d07e054d045e1762637d0
parentb0b875964fef7d1d40ee20c203f73b8ae2d02c5c (diff)
Add definitions of HSA functions and types
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
-rw-r--r--include/hsakmt.h565
-rw-r--r--include/hsakmttypes.h851
2 files changed, 1416 insertions, 0 deletions
diff --git a/include/hsakmt.h b/include/hsakmt.h
new file mode 100644
index 0000000..3983aa0
--- /dev/null
+++ b/include/hsakmt.h
@@ -0,0 +1,565 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _HSAKMT_H_
+#define _HSAKMT_H_
+
+#include "hsakmttypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ "Opens" the HSA kernel driver for user-kernel mode communication.
+
+ On Windows, this function gets a handle to the KFD's AMDKFDIO device object that
+ is responsible for user-kernel communication, this handle is used internally by
+ the thunk library to send device I/O control to the HSA kernel driver.
+ No other thunk library function may be called unless the user-kernel communication
+ channel is opened first.
+
+ On Linux this call opens the "/dev/kfd" device file to establish a communication
+ path to the kernel.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtOpenKFD( void );
+
+/**
+ "Closes" the user-kernel communication path.
+
+ On Windows, the handle obtained by the hsaKmtOpenKFD() function is closed;
+ no other communication with the kernel driver is possible after the successful
+ execution of the saKmdCloseKFD() function. Depending on the failure reason,
+ the user-kernel communication path may or may not be still active.
+
+ On Linux the function closes the "dev/kfd" device file.
+ No further communication to the kernel driver is allowed until hsaKmtOpenKFD()
+ function is called again.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCloseKFD( void );
+
+
+/**
+ Returns the user-kernel interface version supported by KFD.
+ Higher major numbers usually add new features to KFD and may break user-kernel
+ compatibility; higher minor numbers define additional functionality associated
+ within a major number.
+ The calling software should validate that it meets the minimum interface version
+ as described in the API specification.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetVersion(
+ HsaVersionInfo* VersionInfo //OUT
+ );
+
+/**
+ The function takes a "snapshot" of the topology information within the KFD
+ to avoid any changes during the enumeration process.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAcquireSystemProperties(
+ HsaSystemProperties* SystemProperties //OUT
+ );
+
+/**
+ Releases the topology "snapshot" taken by hsaKmtAcquireSystemProperties()
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReleaseSystemProperties( void ) ;
+
+/**
+ Retrieves the discoverable sub-properties for a given HSA
+ node. The parameters returned allow the application or runtime to size the
+ management structures necessary to store the information.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeProperties(
+ HSAuint32 NodeId, //IN
+ HsaNodeProperties* NodeProperties //OUT
+ );
+
+/**
+ Retrieves the memory properties of a specific HSA node.
+ the memory pointer passed as MemoryProperties is sized as
+ NumBanks * sizeof(HsaMemoryProperties). NumBanks is retrieved with the
+ hsaKmtGetNodeProperties() call.
+
+ Some of the data returned is optional. Not all implementations may return all
+ parameters in the hsaMemoryProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeMemoryProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumBanks, //IN
+ HsaMemoryProperties* MemoryProperties //OUT
+ );
+
+/**
+ Retrieves the cache properties of a specific HSA node and processor ID.
+ ProcessorID refers to either a CPU core or a SIMD unit as enumerated earlier
+ via the hsaKmtGetNodeProperties() call.
+ The memory pointer passed as CacheProperties is sized as
+ NumCaches * sizeof(HsaCacheProperties). NumCaches is retrieved with the
+ hsaKmtGetNodeProperties() call.
+
+ The data returned is optional. Not all implementations may return all
+ parameters in the CacheProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeCacheProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 ProcessorId, //IN
+ HSAuint32 NumCaches, //IN
+ HsaCacheProperties* CacheProperties //OUT
+ );
+
+/**
+ Retrieves the HSA IO affinity properties of a specific HSA node.
+ the memory pointer passed as Properties is sized as
+ NumIoLinks * sizeof(HsaIoLinkProperties). NumIoLinks is retrieved with the
+ hsaKmtGetNodeProperties() call.
+
+ The data returned is optional. Not all implementations may return all
+ parameters in the IoLinkProperties.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetNodeIoLinkProperties(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumIoLinks, //IN
+ HsaIoLinkProperties* IoLinkProperties //OUT
+ );
+
+
+
+/**
+ Creates an operating system event associated with a HSA event ID
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateEvent(
+ HsaEventDescriptor* EventDesc, //IN
+ bool ManualReset, //IN
+ bool IsSignaled, //IN
+ HsaEvent** Event //OUT
+ );
+
+/**
+ Destroys an operating system event associated with a HSA event ID
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyEvent(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Sets the specified event object to the signaled state
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetEvent(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Sets the specified event object to the non-signaled state
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtResetEvent(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Queries the state of the specified event object
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtQueryEventState(
+ HsaEvent* Event //IN
+ );
+
+/**
+ Checks the current state of the event object. If the object's state is
+ nonsignaled, the calling thread enters the wait state.
+
+ The function returns when one of the following occurs:
+- The specified event object is in the signaled state.
+- The time-out interval elapses.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnEvent(
+ HsaEvent* Event, //IN
+ HSAuint32 Milliseconds //IN
+ );
+
+/**
+ Checks the current state of multiple event objects.
+
+ The function returns when one of the following occurs:
+- Either any one or all of the specified objects are in the signaled state
+ - if "WaitOnAll" is "true" the function returns when the state of all
+ objects in array is signaled
+ - if "WaitOnAll" is "false" the function returns when the state of any
+ one of the objects is set to signaled
+- The time-out interval elapses.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtWaitOnMultipleEvents(
+ HsaEvent* Events[], //IN
+ HSAuint32 NumEvents, //IN
+ bool WaitOnAll, //IN
+ HSAuint32 Milliseconds //IN
+ );
+
+/**
+ new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform
+ If used on other platforms the function will return HSAKMT_STATUS_ERROR
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtReportQueue(
+ HSA_QUEUEID QueueId, //IN
+ HsaQueueReport* QueueReport //OUT
+ );
+
+/**
+ Creates a GPU queue with user-mode access rights
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtCreateQueue(
+ HSAuint32 NodeId, //IN
+ HSA_QUEUE_TYPE Type, //IN
+ HSAuint32 QueuePercentage, //IN
+ HSA_QUEUE_PRIORITY Priority, //IN
+ void* QueueAddress, //IN
+ HSAuint64 QueueSizeInBytes, //IN
+ HsaEvent* Event, //IN
+ HsaQueueResource* QueueResource //OUT
+ );
+
+/**
+ Updates a queue
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUpdateQueue(
+ HSA_QUEUEID QueueId, //IN
+ HSAuint32 QueuePercentage,//IN
+ HSA_QUEUE_PRIORITY Priority, //IN
+ void* QueueAddress, //IN
+ HSAuint64 QueueSize, //IN
+ HsaEvent* Event //IN
+ );
+
+/**
+ Destroys a queue
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDestroyQueue(
+ HSA_QUEUEID QueueId //IN
+ );
+
+/**
+ Allows an HSA process to set/change the default and alternate memory coherency, before starting to dispatch.
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetMemoryPolicy(
+ HSAuint32 Node, //IN
+ HSAuint32 DefaultPolicy, //IN
+ HSAuint32 AlternatePolicy, //IN
+ void* MemoryAddressAlternate, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes //IN (page-aligned)
+ );
+/**
+ Allocates a memory buffer that may be accessed by the GPU
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtAllocMemory(
+ HSAuint32 PreferredNode, //IN
+ HSAuint64 SizeInBytes, //IN (multiple of page size)
+ HsaMemFlags MemFlags, //IN
+ void** MemoryAddress //OUT (page-aligned)
+ );
+
+/**
+ Frees a memory buffer
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtFreeMemory(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 SizeInBytes //IN
+ );
+
+/**
+ Registers with KFD a memory buffer that may be accessed by the GPU
+ This function will never be required for Linux
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtRegisterMemory(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes //IN (page-aligned)
+ );
+
+
+/**
+ Unregisters with KFD a memory buffer
+ This function will never be required for Linux
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDeregisterMemory(
+ void* MemoryAddress //IN
+ );
+
+
+/**
+ Ensures that the memory is resident and can be accessed by GPU
+ Not implemented yet
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtMapMemoryToGPU(
+ void* MemoryAddress, //IN (page-aligned)
+ HSAuint64 MemorySizeInBytes, //IN (page-aligned)
+ HSAuint64* AlternateVAGPU //OUT (page-aligned)
+ );
+
+/**
+ Releases the residency of the memory
+ Not implemented yet
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtUnmapMemoryToGPU(
+ void* MemoryAddress //IN (page-aligned)
+ );
+
+
+/**
+ Notifies the kernel driver that a process wants to use GPU debugging facilities
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgRegister(
+ HSAuint32 NodeId //IN
+ );
+
+/**
+ Detaches the debugger process from the HW debug established by hsaKmtDbgRegister() API
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgUnregister(
+ HSAuint32 NodeId //IN
+ );
+
+/**
+ Controls a wavefront
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgWavefrontControl(
+ HSAuint32 NodeId, //IN
+ HSA_DBG_WAVEOP Operand, //IN
+ HSA_DBG_WAVEMODE Mode, //IN
+ HSAuint32 TrapId, //IN
+ HsaDbgWaveMessage* DbgWaveMsgRing //IN
+ );
+
+/**
+ Sets watch points on memory address ranges to generate exception events when the
+ watched addresses are accessed
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtDbgAddressWatch(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumWatchPoints, //IN
+ HSA_DBG_WATCH_MODE WatchMode[], //IN
+ void* WatchAddress[], //IN
+ HSAuint64 WatchMask[], //IN, optional
+ HsaEvent* WatchEvent[] //IN, optional
+ );
+
+/**
+ Gets GPU and CPU clock counters for particular Node
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtGetClockCounters(
+ HSAuint32 NodeId, //IN
+ HsaClockCounters* Counters //OUT
+ );
+
+/**
+ Retrieves information on the available HSA counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcGetCounterProperties(
+ HSAuint32 NodeId, //IN
+ HsaCounterProperties** CounterProperties //OUT
+ );
+
+/**
+ Registers a set of (HW) counters to be used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcRegisterTrace(
+ HSAuint32 NodeId, //IN
+ HSAuint32 NumberOfCounters, //IN
+ HsaCounter* Counters, //IN
+ HsaPmcTraceRoot* TraceRoot //OUT
+ );
+
+/**
+ Unregisters a set of (HW) counters used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcUnregisterTrace(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Allows a user mode process to get exclusive access to the defined set of (HW) counters
+ used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcAcquireTraceAccess(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Allows a user mode process to release exclusive access to the defined set of (HW) counters
+ used for tracing/profiling
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcReleaseTraceAccess(
+ HSAuint32 NodeId, //IN
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Starts tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStartTrace(
+ HSATraceId TraceId, //IN
+ void* TraceBuffer, //IN (page aligned)
+ HSAuint64 TraceBufferSizeBytes //IN (page aligned)
+ );
+
+/**
+ Forces an update of all the counters that a previously started trace operation has registered
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcQueryTrace(
+ HSATraceId TraceId //IN
+ );
+
+/**
+ Stops tracing operation on a previously established set of performance counters
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtPmcStopTrace(
+ HSATraceId TraceId //IN
+ );
+
+#ifdef __cplusplus
+} //extern "C"
+#endif
+
+#endif //_HSAKMT_H_
+
diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h
new file mode 100644
index 0000000..178a86d
--- /dev/null
+++ b/include/hsakmttypes.h
@@ -0,0 +1,851 @@
+/*
+ * Copyright © 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including
+ * the next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _HSAKMTTYPES_H_
+#define _HSAKMTTYPES_H_
+
+//the definitions and THUNK API are version specific - define the version numbers here
+#define HSAKMT_VERSION_MAJOR 0
+#define HSAKMT_VERSION_MINOR 99
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32)
+
+ #if defined(_WIN32)
+ #define HSAKMTAPI __stdcall
+ #else
+ #define HSAKMTAPI
+ #endif
+
+ typedef unsigned char HSAuint8;
+ typedef char HSAint8;
+ typedef unsigned short HSAuint16;
+ typedef signed short HSAint16;
+ typedef unsigned __int32 HSAuint32;
+ typedef signed __int64 HSAint64;
+ typedef unsigned __int64 HSAuint64;
+
+#elif defined(__linux__)
+
+#include <stdbool.h>
+#include <stdint.h>
+
+ #define HSAKMTAPI
+
+ typedef uint8_t HSAuint8;
+ typedef int8_t HSAint8;
+ typedef uint16_t HSAuint16;
+ typedef int16_t HSAint16;
+ typedef uint32_t HSAuint32;
+ typedef int64_t HSAint64;
+ typedef uint64_t HSAuint64;
+
+#endif
+
+typedef void* HSA_HANDLE;
+typedef HSAuint64 HSA_QUEUEID;
+
+// This is included in order to force the alignments to be 4 bytes so that
+// it avoids extra padding added by the compiler when a 64-bit binary is generated.
+#pragma pack(push, hsakmttypes_h, 4)
+
+//
+// HSA STATUS codes returned by the KFD Interfaces
+//
+
+typedef enum _HSAKMT_STATUS
+{
+ HSAKMT_STATUS_SUCCESS = 0, // Operation successful
+ HSAKMT_STATUS_ERROR = 1, // General error return if not otherwise specified
+ HSAKMT_STATUS_DRIVER_MISMATCH = 2, // User mode component is not compatible with kernel HSA driver
+
+ HSAKMT_STATUS_INVALID_PARAMETER = 3, // KFD identifies input parameters invalid
+ HSAKMT_STATUS_INVALID_HANDLE = 4, // KFD identifies handle parameter invalid
+ HSAKMT_STATUS_INVALID_NODE_UNIT = 5, // KFD identifies node or unit parameter invalid
+
+ HSAKMT_STATUS_NO_MEMORY = 6, // No memory available (when allocating queues or memory)
+ HSAKMT_STATUS_BUFFER_TOO_SMALL = 7, // A buffer needed to handle a request is too small
+
+ HSAKMT_STATUS_NOT_IMPLEMENTED = 10, // KFD function is not implemented for this set of paramters
+ HSAKMT_STATUS_NOT_SUPPORTED = 11, // KFD function is not supported on this node
+ HSAKMT_STATUS_UNAVAILABLE = 12, // KFD function is not available currently on this node (but
+ // may be at a later time)
+
+ HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened
+ HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR = 21, // user-kernel mode communication failure
+ HSAKMT_STATUS_KERNEL_ALREADY_OPENED = 22, // KFD driver path already opened
+ HSAKMT_STATUS_HSAMMU_UNAVAILABLE = 23, // ATS/PRI 1.1 (Address Translation Services) not available
+ // (IOMMU driver not installed or not-available)
+
+ HSAKMT_STATUS_WAIT_FAILURE = 30, // The wait operation failed
+ HSAKMT_STATUS_WAIT_TIMEOUT = 31, // The wait operation timed out
+
+ HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED = 35, // Memory buffer already registered
+ HSAKMT_STATUS_MEMORY_NOT_REGISTERED = 36, // Memory buffer not registered
+ HSAKMT_STATUS_MEMORY_ALIGNMENT = 37, // Memory parameter not aligned
+
+} HSAKMT_STATUS;
+
+//
+// HSA KFD interface version information. Calling software has to validate that it meets
+// the minimum interface version as described in the API specification.
+// All future structures will be extended in a backward compatible fashion.
+//
+
+typedef struct _HsaVersionInfo
+{
+ HSAuint32 KernelInterfaceMajorVersion; // supported kernel interface major version
+ HSAuint32 KernelInterfaceMinorVersion; // supported kernel interface minor version
+} HsaVersionInfo;
+
+//
+// HSA Topology Discovery Infrastructure structure definitions.
+// The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD
+// The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined
+// in the "Heterogeneous System Architecture Detail Topology" specification.
+//
+// The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output.
+// When the call is made within a process context, a "snapshot" of the topology information
+// is taken within the KFD to avoid any changes during the enumeration process.
+// The Snapshot is released when hsaKmtReleaseSystemProperties() is called
+// or when the process exits or is terminated.
+//
+
+typedef struct _HsaSystemProperties
+{
+ HSAuint32 NumNodes; // the number of "H-NUMA" memory nodes.
+ // each node represents a discoverable node of the system
+ // All other enumeration is done on a per-node basis
+
+ HSAuint32 PlatformOem; // identifies HSA platform, reflects the OEMID in the CRAT
+ HSAuint32 PlatformId; // HSA platform ID, reflects OEM TableID in the CRAT
+ HSAuint32 PlatformRev; // HSA platform revision, reflects Platform Table Revision ID
+} HsaSystemProperties;
+
+
+typedef union
+{
+ HSAuint32 Value;
+ struct
+ {
+ unsigned int HotPluggable : 1; // the node may be removed by some system action
+ // (event will be sent)
+ unsigned int HSAMMUPresent : 1; // This node has an ATS/PRI 1.1 compatible
+ // translation agent in the system (e.g. IOMMUv2)
+ unsigned int SharedWithGraphics : 1; // this HSA nodes' GPU function is also used for OS primary
+ // graphics render (= UI)
+ unsigned int QueueSizePowerOfTwo : 1; // This node GPU requires the queue size to be a power of 2 value
+ unsigned int QueueSize32bit : 1; // This node GPU requires the queue size to be less than 4GB
+ unsigned int QueueIdleEvent : 1; // This node GPU supports notification on Queue Idle
+ unsigned int VALimit : 1; // This node GPU has limited VA range for platform
+ // (typical 40bit). Affects shared VM use for 64bit apps
+ unsigned int WatchPointsSupported: 1; // Indicates if Watchpoints are available on the node.
+ unsigned int WatchPointsTotalBits: 4; // ld(Watchpoints) available. To determine the number use 2^value
+
+ unsigned int Reserved : 20;
+ } ui32;
+} HSA_CAPABILITY;
+
+
+//
+// HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties()
+// The application or runtime can use the information herein to size the topology management structures
+// Unless there is some very weird setup, there is at most one "GPU" device (with a certain number
+// of throughput compute units (= SIMDs) associated with a H-NUMA node.
+//
+
+#define HSA_PUBLIC_NAME_SIZE 128
+
+typedef struct _HsaNodeProperties
+{
+ HSAuint32 NumCPUCores; // # of latency (= CPU) cores present on this HSA node.
+ // This value is 0 for a HSA node with no such cores,
+ // e.g a "discrete HSA GPU"
+ HSAuint32 NumFComputeCores; // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node.
+ // This value is 0 if no FCompute cores are present (e.g. pure "CPU node").
+ HSAuint32 NumMemoryBanks; // # of discoverable memory bank affinity properties on this "H-NUMA" node.
+ HSAuint32 NumCaches; // # of discoverable cache affinity properties on this "H-NUMA" node.
+
+ HSAuint32 NumIOLinks; // # of discoverable IO link affinity properties of this node
+ // connecting to other nodes.
+
+ HSAuint32 CComputeIdLo; // low value of the logical processor ID of the latency (= CPU)
+ // cores available on this node
+ HSAuint32 FComputeIdLo; // low value of the logical processor ID of the throughput (= GPU)
+ // units available on this node
+
+ HSA_CAPABILITY Capability; // see above
+
+ HSAuint32 MaxWavesPerSIMD; // This identifies the max. number of launched waves per SIMD.
+ // If NumFComputeCores is 0, this value is ignored.
+ HSAuint32 LDSSizeInKB; // Size of Local Data Store in Kilobytes per SIMD Wavefront
+ HSAuint32 GDSSizeInKB; // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts
+
+ HSAuint32 WaveFrontSize; // Number of SIMD cores per wavefront executed, typically 64,
+ // may be 32 or a different value for some HSA based architectures
+
+ HSAuint32 NumShaderBanks; // Number of Shader Banks or Shader Engines, typical values are 1 or 2
+
+
+ HSAuint32 NumArrays; // Number of SIMD arrays per engine
+ HSAuint32 NumCUPerArray; // Number of Compute Units (CU) per SIMD array
+ HSAuint32 NumSIMDPerCU; // Number of SIMD representing a Compute Unit (CU)
+
+ HSAuint32 MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access,
+ // may be 0 if HW has no restrictions
+
+ HSAuint32 EngineId; // Identifier (rev) of teh GPU uEngine or Firmware, may be 0
+
+ HSAuint16 VendorId; // GPU vendor id; 0 on latency (= CPU)-only nodes
+ HSAuint16 DeviceId; // GPU device id; 0 on latency (= CPU)-only nodes
+
+ HSAuint32 LocationId; // GPU BDF (Bus/Device/function number) - identifies the device
+ // location in the overall system
+ HSAuint64 LocalMemSize; // Local memory size
+ HSAuint32 MaxEngineClockMhzFCompute; // maximum engine clocks for CPU and
+ HSAuint32 MaxEngineClockMhzCCompute; // GPU function, including any boost caopabilities,
+
+ HSAuint16 MarketingName[HSA_PUBLIC_NAME_SIZE]; // Public name of the "device" on the node (board or APU name).
+ // Unicode string
+} HsaNodeProperties;
+
+
+typedef enum _HSA_HEAPTYPE
+{
+ HSA_HEAPTYPE_SYSTEM = 0,
+ HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC = 1, // CPU "visible" part of GPU device local memory (for discrete GPU)
+ HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU)
+ // All HSA accessible memory is per definition "CPU visible"
+ // "Private memory" is relevant for graphics interop only.
+ HSA_HEAPTYPE_GPU_GDS = 3, // GPU internal memory (GDS)
+ HSA_HEAPTYPE_GPU_LDS = 4, // GPU internal memory (LDS)
+ HSA_HEAPTYPE_GPU_SCRATCH = 5, // GPU special memory (scratch)
+
+ HSA_HEAPTYPE_NUMHEAPTYPES,
+ HSA_HEAPTYPE_SIZE = 0xFFFFFFFF
+} HSA_HEAPTYPE;
+
+typedef union
+{
+ HSAuint32 MemoryProperty;
+ struct
+ {
+ unsigned int HotPluggable : 1; // the memory may be removed by some system action,
+ // memory should be used for temporary data
+ unsigned int NonVolatile : 1; // memory content is preserved across a power-off cycle.
+ unsigned int Reserved :30;
+ } ui32;
+} HSA_MEMORYPROPERTY;
+
+
+//
+// Discoverable HSA Memory properties.
+// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
+//
+
+typedef struct _HsaMemoryProperties
+{
+ HSA_HEAPTYPE HeapType; // system or frame buffer,
+ union
+ {
+ HSAuint64 SizeInBytes; // physical memory size of the memory range in bytes
+ struct
+ {
+ HSAuint32 SizeInBytesLow; // physical memory size of the memory range in bytes (lower 32bit)
+ HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit)
+ } ui32;
+ };
+ HSA_MEMORYPROPERTY Flags; // See definitions above
+
+ HSAuint32 Width; // memory width - the number of parallel bits of the memory interface
+ HSAuint32 MemoryClockMax; // memory clock for the memory, this allows computing the available bandwidth
+ // to the memory when needed
+ HSAuint64 VirtualBaseAddress; // if set to value != 0, indicates the virtual base address of the memory
+ // in process virtual space
+} HsaMemoryProperties;
+
+//
+// Discoverable Cache Properties. (optional).
+// The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
+// Any of the parameters may be 0 (= not defined)
+//
+
+#define HSA_CPU_SIBLINGS 256
+#define HSA_PROCESSORID_ALL 0xFFFFFFFF
+
+typedef union
+{
+ HSAuint32 Value;
+ struct
+ {
+ unsigned int Data : 1;
+ unsigned int Instruction : 1;
+ unsigned int CPU : 1;
+ unsigned int HSACU : 1;
+ unsigned int Reserved :28;
+ } ui32;
+} HsaCacheType;
+
+typedef struct _HaCacheProperties
+{
+ HSAuint32 ProcessorIdLow; // Identifies the processor number
+
+ HSAuint32 CacheLevel; // Integer representing level: 1, 2, 3, 4, etc
+ HSAuint32 CacheSize; // Size of the cache
+ HSAuint32 CacheLineSize; // Cache line size in bytes
+ HSAuint32 CacheLinesPerTag; // Cache lines per Cache Tag
+ HSAuint32 CacheAssociativity; // Cache Associativity
+ HSAuint32 CacheLatency; // Cache latency in ns
+ HsaCacheType CacheType;
+ HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
+} HsaCacheProperties;
+
+
+//
+// Discoverable CPU Compute Properties. (optional).
+// The structure is the output parameter of the hsaKmtGetCComputeProperties() function
+// Any of the parameters may be 0 (= not defined)
+//
+
+typedef struct _HsaCComputeProperties
+{
+ HSAuint32 SiblingMap[HSA_CPU_SIBLINGS];
+} HsaCComputeProperties;
+
+//
+// Discoverable IoLink Properties (optional).
+// The structure is the output parameter of the hsaKmtGetIoLinkProperties() function.
+// Any of the parameters may be 0 (= not defined)
+//
+
+typedef enum _HSA_IOLINKTYPE {
+ HSA_IOLINKTYPE_UNDEFINED = 0,
+ HSA_IOLINKTYPE_HYPERTRANSPORT = 1,
+ HSA_IOLINKTYPE_PCIEXPRESS = 2,
+ HSA_IOLINKTYPE_AMBA = 3,
+ HSA_IOLINKTYPE_MIPI = 4,
+ HSA_IOLINKTYPE_OTHER = 5,
+ HSA_IOLINKTYPE_NUMIOLINKTYPES,
+ HSA_IOLINKTYPE_SIZE = 0xFFFFFFFF
+} HSA_IOLINKTYPE;
+
+typedef union
+{
+ HSAuint32 LinkProperty;
+ struct
+ {
+ unsigned int Override : 1; // bus link properties are determined by this structure
+ // not by the HSA_IOLINKTYPE. The other flags are valid
+ // only if this bit is set to one
+ unsigned int NonCoherent : 1; // The link doesn't support coherent transactions
+ // memory accesses across must not be set to "host cacheable"!
+ unsigned int NoAtomics32bit : 1; // The link doesn't support 32bit-wide atomic transactions
+ unsigned int NoAtomics64bit : 1; // The link doesn't support 64bit-wide atomic transactions
+ unsigned int Reserved :28;
+ } ui32;
+} HSA_LINKPROPERTY;
+
+
+typedef struct _HsaIoLinkProperties
+{
+ HSA_IOLINKTYPE IoLinkType; // see above
+ HSAuint32 VersionMajor; // Bus interface version (optional)
+ HSAuint32 VersionMinor; // Bus interface version (optional)
+
+ HSAuint32 NodeFrom; //
+ HSAuint32 NodeTo; //
+
+ HSAuint32 Weight; // weight factor (derived from CDIT)
+
+ HSAuint32 MinimumLatency; // minimum cost of time to transfer (rounded to ns)
+ HSAuint32 MaximumLatency; // maximum cost of time to transfer (rounded to ns)
+ HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s
+ HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s
+ HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes
+ HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms)
+} HsaIoLinkProperties;
+
+//
+// Memory allocation definitions for the KFD HSA interface
+//
+
+typedef struct _HsaMemFlags
+{
+ union
+ {
+ struct
+ {
+ unsigned int NonPaged : 1; // default = 0: pageable memory
+ unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE
+ unsigned int ReadOnly : 1; // default = 0: Read/Write memory
+ unsigned int PageSize : 2; // see HSA_PAGE_SIZE
+ unsigned int HostAccess : 1; // default = 0: GPU access only
+ unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on
+ // discrete GPU local), allocation may fall back to system memory node 0
+ // memory (= always available). Otherwise no allocation is possible.
+ unsigned int GDSMemory : 1; // default = 0: If set, the allocation will occur in GDS heap.
+ // HostAccess must be 0, all other flags (except NoSubstitute) should
+ // be 0 when setting this entry to 1. GDS allocation may fail due to
+ // limited resources. Application code is required to work without
+ // any allocated GDS memory using regular memory.
+ // Allocation fails on any node without GPU function.
+ unsigned int Scratch : 1; // default = 0: If set, the allocation will occur in GPU "scratch area".
+ // HostAccess must be 0, all other flags (except NoSubstitute) should be 0
+ // when setting this entry to 1. Scratch allocation may fail due to limited
+ // resources. Application code is required to work without any allocation.
+ // Allocation fails on any node without GPU function.
+ unsigned int Reserved : 22;
+ } ui32;
+ HSAuint32 Value;
+ };
+} HsaMemFlags;
+
+typedef enum _HSA_CACHING_TYPE
+{
+ HSA_CACHING_CACHED = 0,
+ HSA_CACHING_NONCACHED = 1,
+ HSA_CACHING_WRITECOMBINED = 2,
+ HSA_CACHING_RESERVED = 3,
+ HSA_CACHING_NUM_CACHING,
+ HSA_CACHING_SIZE = 0xFFFFFFFF
+} HSA_CACHING_TYPE;
+
+typedef enum _HSA_PAGE_SIZE
+{
+ HSA_PAGE_SIZE_4KB = 0,
+ HSA_PAGE_SIZE_64KB = 1, //64KB pages, not generally available in systems
+ HSA_PAGE_SIZE_2MB = 2,
+ HSA_PAGE_SIZE_1GB = 3, //1GB pages, not generally available in systems
+} HSA_PAGE_SIZE;
+
+
+typedef enum _HSA_DEVICE
+{
+ HSA_DEVICE_CPU = 0,
+ HSA_DEVICE_GPU = 1,
+ MAX_HSA_DEVICE = 2
+} HSA_DEVICE;
+
+
+typedef enum _HSA_QUEUE_PRIORITY
+{
+ HSA_QUEUE_PRIORITY_MINIMUM = -3,
+ HSA_QUEUE_PRIORITY_LOW = -2,
+ HSA_QUEUE_PRIORITY_BELOW_NORMAL = -1,
+ HSA_QUEUE_PRIORITY_NORMAL = 0,
+ HSA_QUEUE_PRIORITY_ABOVE_NORMAL = 1,
+ HSA_QUEUE_PRIORITY_HIGH = 2,
+ HSA_QUEUE_PRIORITY_MAXIMUM = 3,
+ HSA_QUEUE_PRIORITY_NUM_PRIORITY,
+ HSA_QUEUE_PRIORITY_SIZE = 0xFFFFFFFF
+} HSA_QUEUE_PRIORITY;
+
+typedef enum _HSA_QUEUE_TYPE
+{
+ HSA_QUEUE_COMPUTE = 1, // AMD PM4 compatible Compute Queue
+ HSA_QUEUE_SDMA = 2, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
+ HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue
+ HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue
+
+ // the following values indicate a queue type permitted to reference OS graphics
+ // resources through the interoperation API. See [5] "HSA Graphics Interoperation
+ // specification" for more details on use of such resources.
+
+ HSA_QUEUE_COMPUTE_OS = 11, // AMD PM4 compatible Compute Queue
+ HSA_QUEUE_SDMA_OS = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
+ HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue
+ HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14, // reserved, for HSA multimedia encode queue
+
+ HSA_QUEUE_COMPUTE_AQL = 21, // HSA AQL packet compatible Compute Queue
+ HSA_QUEUE_DMA_AQL = 22, // HSA AQL packet compatible DMA Queue
+
+ // more types in the future
+
+ HSA_QUEUE_TYPE_SIZE = 0xFFFFFFFF //aligns to 32bit enum
+} HSA_QUEUE_TYPE;
+
+typedef struct _HsaQueueResource
+{
+ HSA_QUEUEID QueueId; /** queue ID */
+ /** Doorbell address to notify HW of a new dispatch */
+ union
+ {
+ HSAuint32* Queue_DoorBell;
+ HSAuint64* Queue_DoorBell_aql;
+ HSAuint64 QueueDoorBell;
+ };
+
+ /** virtual address to notify HW of queue write ptr value */
+ union
+ {
+ HSAuint32* Queue_write_ptr;
+ HSAuint64* Queue_write_ptr_aql;
+ HSAuint64 QueueWptrValue;
+ };
+
+ /** virtual address updated by HW to indicate current read location */
+ union
+ {
+ HSAuint32* Queue_read_ptr;
+ HSAuint64* Queue_read_ptr_aql;
+ HSAuint64 QueueRptrValue;
+ };
+
+} HsaQueueResource;
+
+
+//TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform
+typedef struct _HsaQueueReport
+{
+ HSAuint32 VMID; //Required on SI to dispatch IB in primary ring
+ void* QueueAddress; //virtual address of UM mapped compute ring
+ HSAuint64 QueueSize; //size of the UM mapped compute ring
+} HsaQueueReport;
+
+
+
+typedef enum _HSA_DBG_WAVEOP
+{
+ HSA_DBG_WAVEOP_HALT = 1, //Halts a wavefront
+ HSA_DBG_WAVEOP_RESUME = 2, //Resumes a wavefront
+ HSA_DBG_WAVEOP_KILL = 3, //Kills a wavefront
+ HSA_DBG_WAVEOP_DEBUG = 4, //Causes wavefront to enter debug mode
+ HSA_DBG_WAVEOP_TRAP = 5, //Causes wavefront to take a trap
+ HSA_DBG_NUM_WAVEOP = 5,
+ HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
+} HSA_DBG_WAVEOP;
+
+typedef enum _HSA_DBG_WAVEMODE
+{
+ HSA_DBG_WAVEMODE_SINGLE = 0, //send command to a single wave
+ //Broadcast to all wavefronts of all processes is not supported for HSA user mode
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, //send to waves within current process
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, //send to waves within current process on CU
+ HSA_DBG_NUM_WAVEMODE = 3,
+ HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
+} HSA_DBG_WAVEMODE;
+
+
+typedef enum _HSA_DBG_WAVEMSG_TYPE
+{
+ HSA_DBG_WAVEMSG_AUTO = 0,
+ HSA_DBG_WAVEMSG_USER = 1,
+ HSA_DBG_WAVEMSG_ERROR = 2,
+ HSA_DBG_NUM_WAVEMSG,
+ HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
+} HSA_DBG_WAVEMSG_TYPE;
+
+typedef enum _HSA_DBG_WATCH_MODE
+{
+ HSA_DBG_WATCH_READ = 0, //Read operations only
+ HSA_DBG_WATCH_NONREAD = 1, //Write or Atomic operations only
+ HSA_DBG_WATCH_ATOMIC = 2, //Atomic Operations only
+ HSA_DBG_WATCH_ALL = 3, //Read, Write or Atomic operations
+ HSA_DBG_WATCH_NUM,
+ HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
+} HSA_DBG_WATCH_MODE;
+
+
+//This structure is hardware specific and may change in the future
+typedef struct _HsaDbgWaveMsgAMDGen2
+{
+ HSAuint32 Value;
+ HSAuint32 Reserved2;
+
+} HsaDbgWaveMsgAMDGen2;
+
+typedef union _HsaDbgWaveMessageAMD
+{
+ HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
+ //for future HsaDbgWaveMsgAMDGen3;
+} HsaDbgWaveMessageAMD;
+
+typedef struct _HsaDbgWaveMessage
+{
+ void* MemoryVA; // ptr to associated host-accessible data
+ HsaDbgWaveMessageAMD DbgWaveMsg;
+} HsaDbgWaveMessage;
+
+
+//
+// HSA sync primitive, Event and HW Exception notification API definitions
+// The API functions allow the runtime to define a so-called sync-primitive, a SW object
+// combining a user-mode provided "syncvar" and a scheduler event that can be signaled
+// through a defined GPU interrupt. A syncvar is a process virtual memory location of
+// a certain size that can be accessed by CPU and GPU shader code within the process to set
+// and query the content within that memory. The definition of the content is determined by
+// the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
+// The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
+// user mode instruction stream.
+// The OS scheduler event is typically associated and signaled by an interrupt issued by
+// the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be
+// surfaced by the KFD by this mechanism, too.
+//
+
+// these are the new definitions for events
+typedef enum _HSA_EVENTTYPE
+{
+ HSA_EVENTTYPE_SIGNAL = 0, //user-mode generated GPU signal
+ HSA_EVENTTYPE_NODECHANGE = 1, //HSA node change (attach/detach)
+ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, //HSA device state change( start/stop )
+ HSA_EVENTTYPE_HW_EXCEPTION = 3, //GPU shader exception event
+ HSA_EVENTTYPE_SYSTEM_EVENT = 4, //GPU SYSCALL with parameter info
+ HSA_EVENTTYPE_DEBUG_EVENT = 5, //GPU signal for debugging
+ HSA_EVENTTYPE_PROFILE_EVENT = 6, //GPU signal for profiling
+ HSA_EVENTTYPE_QUEUE_EVENT = 7, //GPU signal queue idle state (EOP pm4)
+ //...
+ HSA_EVENTTYPE_MAXID,
+ HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
+} HSA_EVENTTYPE;
+
+typedef HSAuint32 HSA_EVENTID;
+
+//
+// Subdefinitions for various event types: Syncvar
+//
+
+typedef struct _HsaSyncVar
+{
+ union
+ {
+ void* UserData; //pointer to user mode data
+ HSAuint64 UserDataPtrValue; //64bit compatibility of value
+ } SyncVar;
+ HSAuint64 SyncVarSize;
+} HsaSyncVar;
+
+//
+// Subdefinitions for various event types: NodeChange
+//
+
+typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS
+{
+ HSA_EVENTTYPE_NODECHANGE_ADD = 0,
+ HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
+ HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
+} HSA_EVENTTYPE_NODECHANGE_FLAGS;
+
+typedef struct _HsaNodeChange
+{
+ HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; // HSA node added/removed on the platform
+} HsaNodeChange;
+
+//
+// Sub-definitions for various event types: DeviceStateChange
+//
+
+typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS
+{
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, //device started (and available)
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, //device stopped (i.e. unavailable)
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
+} HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;
+
+typedef struct _HsaDeviceStateChange
+{
+ HSAuint32 NodeId; // F-NUMA node that contains the device
+ HSA_DEVICE Device; // device type: GPU or CPU
+ HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; // event flags
+} HsaDeviceStateChange;
+
+
+typedef struct _HsaEventData
+{
+ HSA_EVENTTYPE EventType; //event type
+ union
+ {
+ // return data associated with HSA_EVENTTYPE_SIGNAL and other events
+ HsaSyncVar SyncVar;
+
+ // data associated with HSA_EVENTTYPE_NODE_CHANGE
+ HsaNodeChange NodeChangeState;
+
+ // data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE
+ HsaDeviceStateChange DeviceState;
+ } EventData;
+
+ // the following data entries are internal to the KFD & thunk itself.
+
+ HSAuint64 HWData1; // internal thunk store for Event data (OsEventHandle)
+ HSAuint64 HWData2; // internal thunk store for Event data (HWAddress)
+ HSAuint32 HWData3; // internal thunk store for Event data (HWData)
+} HsaEventData;
+
+
+typedef struct _HsaEventDescriptor
+{
+ HSA_EVENTTYPE EventType; // event type to allocate
+ HSAuint32 NodeId; // H-NUMA node containing GPU device that is event source
+ HsaSyncVar SyncVar; // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL
+} HsaEventDescriptor;
+
+
+typedef struct _HsaEvent
+{
+ HSA_EVENTID EventId;
+ HsaEventData EventData;
+} HsaEvent;
+
+typedef enum _HsaEventTimeout
+{
+ HSA_EVENTTIMEOUT_IMMEDIATE = 0,
+ HSA_EVENTTIMEOUT_INFINITE = 0xFFFFFFFF
+} HsaEventTimeOut;
+
+typedef struct _HsaClockCounters
+{
+ HSAuint64 GPUClockCounter;
+ HSAuint64 CPUClockCounter;
+ HSAuint64 SystemClockCounter;
+ HSAuint64 SystemClockFrequencyHz;
+} HsaClockCounters;
+
+#ifndef DEFINE_GUID
+typedef struct _HSA_UUID
+{
+ HSAuint32 Data1;
+ HSAuint16 Data2;
+ HSAuint16 Data3;
+ HSAuint8 Data4[8];
+} HSA_UUID;
+
+#define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
+ static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
+#else
+#define HSA_UUID GUID
+#define HSA_DEFINE_UUID DEFINE_GUID
+#endif
+
+
+// GUID that identifies the GPU Shader Sequencer (SQ) block
+// {B5C396B6-D310-47E4-86FC-5CC3043AF508}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ,
+0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8);
+
+// GUID that identifies the GPU Memory Controller (MC) block
+// {13900B57-4956-4D98-81D0-68521937F59C}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC,
+0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c);
+
+// GUID that identifies the IMOMMUv2 HW device
+// {80969879-B0F6-4BE6-97F6-6A6300F5101D}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2,
+0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d);
+
+// GUID that identifies the KFD
+// {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A}
+HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER,
+0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa);
+
+typedef enum _HSA_PROFILE_TYPE
+{
+ HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only)
+ HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously
+ //writes to memory on updates (KFD access only)
+ HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE = 2, //user-queue accessible counter
+ HSA_PROFILE_TYPE_NONPRIV_STREAMING = 3, //user-queue accessible counter
+ //...
+ HSA_PROFILE_TYPE_NUM,
+
+ HSA_PROFILE_TYPE_SIZE = 0xFFFFFFFF // In order to align to 32-bit value
+} HSA_PROFILE_TYPE;
+
+
+typedef struct _HsaCounterFlags
+{
+ union
+ {
+ struct
+ {
+ unsigned int Global : 1; // counter is global
+ // (not tied to VMID/WAVE/CU, ...)
+ unsigned int Resettable : 1; // counter can be reset by SW
+ // (always to 0?)
+ unsigned int ReadOnly : 1; // counter is read-only
+ // (but may be reset, if indicated)
+ unsigned int Stream : 1; // counter has streaming capability
+ // (after trigger, updates buffer)
+ unsigned int Reserved : 28;
+ } ui32;
+ HSAuint32 Value;
+ };
+} HsaCounterFlags;
+
+
+typedef struct _HsaCounter
+{
+ HSA_PROFILE_TYPE Type; // specifies the counter type
+ HSAuint64 CounterId; // indicates counter register offset
+ HSAuint32 CounterSizeInBits; // indicates relevant counter bits
+ HSAuint64 CounterMask; // bitmask for counter value (if applicable)
+ HsaCounterFlags Flags; // Property flags (see above)
+ HSAuint32 BlockIndex; // identifies block the counter belongs to,
+ // value may be 0 to NumBlocks
+} HsaCounter;
+
+
+typedef struct _HsaCounterBlockProperties
+{
+ HSA_UUID BlockId; // specifies the block location
+ HSAuint32 NumCounters; // How many counters are available?
+ // (sizes Counters[] array below)
+ HSAuint32 NumConcurrent; // How many counter slots are available
+ // in block?
+ HsaCounter Counters[1]; // Start of counter array
+ // (NumCounters elements total)
+} HsaCounterBlockProperties;
+
+
+typedef struct _HsaCounterProperties
+{
+ HSAuint32 NumBlocks; // How many profilable block are available?
+ // (sizes Blocks[] array below)
+ HSAuint32 NumConcurrent; // How many blocks slots can be queried
+ // concurrently by HW?
+ HsaCounterBlockProperties Blocks[1]; // Start of block array
+ // (NumBlocks elements total)
+} HsaCounterProperties;
+
+typedef HSAuint64 HSATraceId;
+
+typedef struct _HsaPmcTraceRoot
+{
+ HSAuint64 TraceBufferMinSizeBytes;// (page aligned)
+ HSAuint32 NumberOfPasses;
+ HSATraceId TraceId;
+} HsaPmcTraceRoot;
+
+#pragma pack(pop, hsakmttypes_h)
+
+
+#ifdef __cplusplus
+} //extern "C"
+#endif
+
+#endif //_HSAKMTTYPES_H_