summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Goz <ben.goz@amd.com>2014-12-07 14:36:21 +0200
committerOded Gabbay <oded.gabbay@gmail.com>2015-06-12 23:22:35 +0300
commit992c631933fd3bc81e56323045797669d0792dbd (patch)
treedabcb0a6b12acf5d695153469567904c45f0bbdb
parent37910359f0877d3c22a56f384d3714fa79d51cad (diff)
Add support for allocating executable memory
Signed-off-by: Ben Goz <ben.goz@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r--include/hsakmt.h16
-rw-r--r--include/hsakmttypes.h24
-rw-r--r--src/memory.c24
3 files changed, 53 insertions, 11 deletions
diff --git a/include/hsakmt.h b/include/hsakmt.h
index 3983aa0..c87b3f8 100644
--- a/include/hsakmt.h
+++ b/include/hsakmt.h
@@ -382,7 +382,6 @@ hsaKmtDeregisterMemory(
/**
Ensures that the memory is resident and can be accessed by GPU
- Not implemented yet
*/
HSAKMT_STATUS
@@ -395,7 +394,6 @@ hsaKmtMapMemoryToGPU(
/**
Releases the residency of the memory
- Not implemented yet
*/
HSAKMT_STATUS
@@ -557,6 +555,20 @@ hsaKmtPmcStopTrace(
HSATraceId TraceId //IN
);
+/**
+ Sets trap handler and trap buffer to be used for all queues associated with the specified NodeId within this process context
+*/
+
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtSetTrapHandler(
+ HSAuint32 NodeId, //IN
+ void* TrapHandlerBaseAddress, //IN
+ HSAuint64 TrapHandlerSizeInBytes, //IN
+ void* TrapBufferBaseAddress, //IN
+ HSAuint64 TrapBufferSizeInBytes //IN
+ );
+
#ifdef __cplusplus
} //extern "C"
#endif
diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h
index 41e9cba..ec10255 100644
--- a/include/hsakmttypes.h
+++ b/include/hsakmttypes.h
@@ -420,7 +420,27 @@ typedef struct _HsaMemFlags
// when setting this entry to 1. Scratch allocation may fail due to limited
// resources. Application code is required to work without any allocation.
// Allocation fails on any node without GPU function.
- unsigned int Reserved : 22;
+ unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow
+ // atomic ops processing. On AMD APU, this will use the ATC path on system
+ // memory, irrespective of the NonPaged flag setting (= if NonPaged is set,
+ // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM).
+ // All atomic ops must be supported on this memory.
+ unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however
+ // focused on AMD discrete GPU that support PCIe atomics; the memory
+ // allocation is mapped to allow for PCIe atomics to operate on system
+ // memory, irrespective of NonPaged set or the presence of an ATC path
+ // in the system. The atomic operations supported are limited to SWAP,
+ // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic
+ // increment and decrement via 2-complement arithmetic), which are the
+ // only atomic ops directly supported in PCI Express.
+ // On AMD APU, setting this flag will allocate the same type of memory
+ // as AtomicAccessFull, but it will be considered compatible with
+ // discrete GPU atomic operations access.
+ unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed
+ // for executable code (e.g. queue memory) by the host CPU or the device.
+ // Influences the page attribute setting within the allocation
+ unsigned int Reserved : 19;
+
} ui32;
HSAuint32 Value;
};
@@ -701,7 +721,7 @@ typedef struct _HsaMemoryAccessFault
{
HSAuint32 NodeId; // H-NUMA node that contains the device where the memory access occurred
HSAuint64 VirtualAddress; // virtual address this occurred on
- HsaAccessAttributeFailure Failure; // failure attribute
+ HsaAccessAttributeFailure Failure; // failure attribute
HSA_EVENTID_MEMORYFLAGS Flags; // event flags
} HsaMemoryAccessFault;
diff --git a/src/memory.c b/src/memory.c
index d65730a..718dd97 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -26,8 +26,12 @@
#include "libhsakmt.h"
#include "linux/kfd_ioctl.h"
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <assert.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
#include "fmm.h"
HSAKMT_STATUS
@@ -97,6 +101,7 @@ hsaKmtAllocMemory(
CHECK_KFD_OPEN();
HSAKMT_STATUS result;
uint32_t gpu_id;
+ int err;
result = validate_nodeid(PreferredNode, &gpu_id);
if (result != HSAKMT_STATUS_SUCCESS)
@@ -108,16 +113,21 @@ hsaKmtAllocMemory(
return HSAKMT_STATUS_INVALID_PARAMETER;
}
- if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged){
- int err = posix_memalign(MemoryAddress, page_size, SizeInBytes);
- if (err == 0)
- return HSAKMT_STATUS_SUCCESS;
- else
+ if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged) {
+ err = posix_memalign(MemoryAddress, page_size, SizeInBytes);
+ if (err != 0)
return HSAKMT_STATUS_NO_MEMORY;
+ if (MemFlags.ui32.ExecuteAccess) {
+ err = mprotect(*MemoryAddress, SizeInBytes, PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (err != 0) {
+ free(*MemoryAddress);
+ return err;
+ }
+ }
+ return HSAKMT_STATUS_SUCCESS;
}
- else
- return HSAKMT_STATUS_INVALID_PARAMETER;
+ return HSAKMT_STATUS_INVALID_PARAMETER;
}
HSAKMT_STATUS