diff options
author | Ben Goz <ben.goz@amd.com> | 2014-12-07 14:36:21 +0200 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2015-06-12 23:22:35 +0300 |
commit | 992c631933fd3bc81e56323045797669d0792dbd (patch) | |
tree | dabcb0a6b12acf5d695153469567904c45f0bbdb | |
parent | 37910359f0877d3c22a56f384d3714fa79d51cad (diff) |
Add support for allocating executable memory
Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r-- | include/hsakmt.h | 16 | ||||
-rw-r--r-- | include/hsakmttypes.h | 24 | ||||
-rw-r--r-- | src/memory.c | 24 |
3 files changed, 53 insertions, 11 deletions
diff --git a/include/hsakmt.h b/include/hsakmt.h index 3983aa0..c87b3f8 100644 --- a/include/hsakmt.h +++ b/include/hsakmt.h @@ -382,7 +382,6 @@ hsaKmtDeregisterMemory( /** Ensures that the memory is resident and can be accessed by GPU - Not implemented yet */ HSAKMT_STATUS @@ -395,7 +394,6 @@ hsaKmtMapMemoryToGPU( /** Releases the residency of the memory - Not implemented yet */ HSAKMT_STATUS @@ -557,6 +555,20 @@ hsaKmtPmcStopTrace( HSATraceId TraceId //IN ); +/** + Sets trap handler and trap buffer to be used for all queues associated with the specified NodeId within this process context +*/ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtSetTrapHandler( + HSAuint32 NodeId, //IN + void* TrapHandlerBaseAddress, //IN + HSAuint64 TrapHandlerSizeInBytes, //IN + void* TrapBufferBaseAddress, //IN + HSAuint64 TrapBufferSizeInBytes //IN + ); + #ifdef __cplusplus } //extern "C" #endif diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h index 41e9cba..ec10255 100644 --- a/include/hsakmttypes.h +++ b/include/hsakmttypes.h @@ -420,7 +420,27 @@ typedef struct _HsaMemFlags // when setting this entry to 1. Scratch allocation may fail due to limited // resources. Application code is required to work without any allocation. // Allocation fails on any node without GPU function. - unsigned int Reserved : 22; + unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow + // atomic ops processing. On AMD APU, this will use the ATC path on system + // memory, irrespective of the NonPaged flag setting (= if NonPaged is set, + // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM). + // All atomic ops must be supported on this memory. + unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however + // focused on AMD discrete GPU that support PCIe atomics; the memory + // allocation is mapped to allow for PCIe atomics to operate on system + // memory, irrespective of NonPaged set or the presence of an ATC path + // in the system. The atomic operations supported are limited to SWAP, + // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic + // increment and decrement via 2-complement arithmetic), which are the + // only atomic ops directly supported in PCI Express. + // On AMD APU, setting this flag will allocate the same type of memory + // as AtomicAccessFull, but it will be considered compatible with + // discrete GPU atomic operations access. + unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed + // for executable code (e.g. queue memory) by the host CPU or the device. + // Influences the page attribute setting within the allocation + unsigned int Reserved : 19; + } ui32; HSAuint32 Value; }; @@ -701,7 +721,7 @@ typedef struct _HsaMemoryAccessFault { HSAuint32 NodeId; // H-NUMA node that contains the device where the memory access occurred HSAuint64 VirtualAddress; // virtual address this occurred on - HsaAccessAttributeFailure Failure; // failure attribute + HsaAccessAttributeFailure Failure; // failure attribute HSA_EVENTID_MEMORYFLAGS Flags; // event flags } HsaMemoryAccessFault; diff --git a/src/memory.c b/src/memory.c index d65730a..718dd97 100644 --- a/src/memory.c +++ b/src/memory.c @@ -26,8 +26,12 @@ #include "libhsakmt.h" #include "linux/kfd_ioctl.h" #include <stdlib.h> +#include <stdio.h> #include <string.h> #include <assert.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <fcntl.h> #include "fmm.h" HSAKMT_STATUS @@ -97,6 +101,7 @@ hsaKmtAllocMemory( CHECK_KFD_OPEN(); HSAKMT_STATUS result; uint32_t gpu_id; + int err; result = validate_nodeid(PreferredNode, &gpu_id); if (result != HSAKMT_STATUS_SUCCESS) @@ -108,16 +113,21 @@ hsaKmtAllocMemory( return HSAKMT_STATUS_INVALID_PARAMETER; } - if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged){ - int err = posix_memalign(MemoryAddress, page_size, SizeInBytes); - if (err == 0) - return HSAKMT_STATUS_SUCCESS; - else + if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged) { + err = posix_memalign(MemoryAddress, page_size, SizeInBytes); + if (err != 0) return HSAKMT_STATUS_NO_MEMORY; + if (MemFlags.ui32.ExecuteAccess) { + err = mprotect(*MemoryAddress, SizeInBytes, PROT_READ | PROT_WRITE | PROT_EXEC); + if (err != 0) { + free(*MemoryAddress); + return err; + } + } + return HSAKMT_STATUS_SUCCESS; } - else - return HSAKMT_STATUS_INVALID_PARAMETER; + return HSAKMT_STATUS_INVALID_PARAMETER; } HSAKMT_STATUS |