diff options
author | Evgeny Pinchuk <evgeny.pinchuk@amd.com> | 2014-07-29 13:52:02 +0300 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@amd.com> | 2014-07-29 13:52:02 +0300 |
commit | 8f560d5b8333d2ffd09e05384759fa641f95e0f4 (patch) | |
tree | 9a5b96186450cfeca5901e5ea9d75b4dbda6de7d | |
parent | 1905152a3387f65da8080f8d46e484dc0437ebda (diff) |
Add topology module
Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
-rw-r--r-- | src/topology.c | 840 |
1 files changed, 834 insertions, 6 deletions
diff --git a/src/topology.c b/src/topology.c index e52f17b..492b5a3 100644 --- a/src/topology.c +++ b/src/topology.c @@ -30,6 +30,665 @@ #include <string.h> #include "libhsakmt.h" +#include "fmm.h" +#define PAGE_SIZE 4096 +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define NUM_OF_HEAPS 2 +/* SYSFS related */ +#define KFD_SYSFS_PATH_GENERATION_ID "/sys/devices/virtual/kfd/kfd/topology/generation_id" +#define KFD_SYSFS_PATH_SYSTEM_PROPERTIES "/sys/devices/virtual/kfd/kfd/topology/system_properties" +#define KFD_SYSFS_PATH_NODES "/sys/devices/virtual/kfd/kfd/topology/nodes" + +typedef struct { + uint32_t gpu_id; + HsaNodeProperties node; + HsaMemoryProperties *mem; /* node->NumBanks elements */ + HsaCacheProperties *cache; + HsaIoLinkProperties *link; +} node_t; + +static HsaSystemProperties *system = NULL; +static node_t *node = NULL; + +static HSAKMT_STATUS topology_take_snapshot(void); +static HSAKMT_STATUS topology_drop_snapshot(void); + +static void +free_node(node_t *n) +{ + assert(n); + + if (n == NULL) + return; + + if ((n)->mem) + free((n)->mem); + if ((n)->cache) + free((n)->cache); + if ((n)->link) + free((n)->link); +} + +static HSAKMT_STATUS +topology_sysfs_get_generation(uint32_t *gen) { + FILE *fd; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + assert(gen); + fd = fopen(KFD_SYSFS_PATH_GENERATION_ID, "r"); + if (!fd) + return HSAKMT_STATUS_ERROR; + if (fscanf(fd, "%ul", gen) != 1) { + ret = HSAKMT_STATUS_ERROR; + goto err; + } + +err: + fclose(fd); + return ret; +} + +static HSAKMT_STATUS +topology_sysfs_get_system_props(HsaSystemProperties *props) { + FILE *fd; + DIR *dirp; + char *read_buf, *p; + char prop_name[256]; + long long unsigned int prop_val; + uint32_t node_count, prog; + struct dirent *dir; + int read_size; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + + assert(props); + fd = fopen(KFD_SYSFS_PATH_SYSTEM_PROPERTIES, "r"); + if (!fd) + return HSAKMT_STATUS_ERROR; + + read_buf = malloc(PAGE_SIZE); + if (!read_buf) { + ret = HSAKMT_STATUS_NO_MEMORY; + goto err1; + } + + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { + ret = HSAKMT_STATUS_ERROR; + goto err2; + } + + /* Since we're using the buffer as a string, we make sure the string terminates */ + if(read_size >= PAGE_SIZE) + read_size = PAGE_SIZE-1; + read_buf[read_size] = 0; + + /* + * Read the system properties + */ + prog = 0; + p = read_buf; + while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name,"platform_oem") == 0) + props->PlatformOem = (uint32_t)prop_val; + else if (strcmp(prop_name,"platform_id") == 0) + props->PlatformId = (uint32_t)prop_val; + else if (strcmp(prop_name,"platform_rev") == 0) + props->PlatformRev = (uint32_t)prop_val; + } + + /* + * Discover the number of nodes + */ + node_count = 0; + dirp = opendir(KFD_SYSFS_PATH_NODES); + if(dirp) { + /* + * Assuming that inside nodes folder there are only folders + * which represent the node numbers + */ + while ((dir = readdir(dirp)) != 0) { + if ((strcmp(dir->d_name, ".") == 0) || + (strcmp(dir->d_name, "..") == 0)) + continue; + node_count++; + } + closedir(dirp); + } + props->NumNodes = node_count; + + +err2: + free(read_buf); +err1: + fclose(fd); + return ret; +} + +static HSAKMT_STATUS +topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) { + FILE *fd; + char path[256]; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + assert(gpu_id); + snprintf(path, 256, "%s/%d/gpu_id", KFD_SYSFS_PATH_NODES, node_id); + fd = fopen(path, "r"); + if (!fd) + return HSAKMT_STATUS_ERROR; + if (fscanf(fd, "%ul", gpu_id) != 1) { + ret = HSAKMT_STATUS_ERROR; + } + fclose(fd); + + return ret; +} + +static HSAKMT_STATUS +topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32_t *gpu_id) { + FILE *fd; + char *read_buf, *p; + char prop_name[256]; + char path[256]; + long long unsigned int prop_val; + uint32_t i, prog; + int read_size; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + assert(props); + assert(gpu_id); + /* Retrieve the GPU ID */ + ret = topology_sysfs_get_gpu_id(node_id, gpu_id); + + /* Retrieve the marketing name of the node */ + snprintf(path, 256, "%s/%d/name", KFD_SYSFS_PATH_NODES, node_id); + fd = fopen(path, "r"); + if (!fd) + return HSAKMT_STATUS_ERROR; + + read_buf = malloc(PAGE_SIZE); + if (!read_buf) { + ret = HSAKMT_STATUS_NO_MEMORY; + goto err1; + } + + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { + ret = HSAKMT_STATUS_ERROR; + goto err2; + } + p = memchr(read_buf, '\n', read_size); + if ((!p) || ((p-read_buf) > HSA_PUBLIC_NAME_SIZE)) { + ret = HSAKMT_STATUS_ERROR; + goto err2; + } + /* + * Convert UTF8 to UTF16 + */ + for (i = 0; (i < HSA_PUBLIC_NAME_SIZE) && (read_buf[i] != '\n'); i++) + props->MarketingName[i] = read_buf[i]; + props->MarketingName[i] = 0; + fclose(fd); + + /* Retrieve the node properties */ + snprintf(path, 256, "%s/%d/properties", KFD_SYSFS_PATH_NODES, node_id); + fd = fopen(path, "r"); + if (!fd) { + free(read_buf); + return HSAKMT_STATUS_ERROR; + } + + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { + ret = HSAKMT_STATUS_ERROR; + goto err2; + } + + /* Since we're using the buffer as a string, we make sure the string terminates */ + if(read_size >= PAGE_SIZE) + read_size = PAGE_SIZE-1; + read_buf[read_size] = 0; + + /* + * Read the node properties + */ + prog = 0; + p = read_buf; + while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name,"cpu_cores_count") == 0) + props->NumCPUCores = (uint32_t)prop_val; + else if (strcmp(prop_name,"simd_count") == 0) + props->NumFComputeCores = (uint32_t)prop_val; + else if (strcmp(prop_name,"mem_banks_count") == 0) + props->NumMemoryBanks = (uint32_t)prop_val; + else if (strcmp(prop_name,"caches_count") == 0) + props->NumCaches = (uint32_t)prop_val; + else if (strcmp(prop_name,"io_links_count") == 0) + props->NumIOLinks = (uint32_t)prop_val; + else if (strcmp(prop_name,"cpu_core_id_base") == 0) + props->CComputeIdLo = (uint32_t)prop_val; + else if (strcmp(prop_name,"simd_id_base") == 0) + props->FComputeIdLo = (uint32_t)prop_val; + else if (strcmp(prop_name,"capability") == 0) + props->Capability.Value = (uint32_t)prop_val; + else if (strcmp(prop_name,"max_waves_per_simd") == 0) + props->MaxWavesPerSIMD = (uint32_t)prop_val; + else if (strcmp(prop_name,"lds_size_in_kb") == 0) + props->LDSSizeInKB = (uint32_t)prop_val; + else if (strcmp(prop_name,"gds_size_in_kb") == 0) + props->GDSSizeInKB = (uint32_t)prop_val; + else if (strcmp(prop_name,"wave_front_size") == 0) + props->WaveFrontSize = (uint32_t)prop_val; + else if (strcmp(prop_name,"array_count") == 0) + props->NumShaderBanks = (uint32_t)prop_val; + else if (strcmp(prop_name,"simd_arrays_per_engine") == 0) + props->NumArrays = (uint32_t)prop_val; + else if (strcmp(prop_name,"cu_per_simd_array") == 0) + props->NumCUPerArray = (uint32_t)prop_val; + else if (strcmp(prop_name,"simd_per_cu") == 0) + props->NumSIMDPerCU = (uint32_t)prop_val; + else if (strcmp(prop_name,"max_slots_scratch_cu") == 0) + props->MaxSlotsScratchCU = (uint32_t)prop_val; + else if (strcmp(prop_name,"engine_id") == 0) + props->EngineId = (uint32_t)prop_val; + else if (strcmp(prop_name,"vendor_id") == 0) + props->VendorId = (uint32_t)prop_val; + else if (strcmp(prop_name,"device_id") == 0) + props->DeviceId = (uint32_t)prop_val; + else if (strcmp(prop_name,"location_id") == 0) + props->LocationId = (uint32_t)prop_val; + else if (strcmp(prop_name,"max_engine_clk_fcompute") == 0) + props->MaxEngineClockMhzFCompute = (uint32_t)prop_val; + else if (strcmp(prop_name,"max_engine_clk_ccompute") == 0) + props->MaxEngineClockMhzCCompute = (uint32_t)prop_val; + else if (strcmp(prop_name,"local_mem_size") == 0) + props->LocalMemSize = (uint32_t)prop_val; + } + +err2: + free(read_buf); +err1: + fclose(fd); + return ret; +} + +static HSAKMT_STATUS +topology_sysfs_get_mem_props(uint32_t node_id, uint32_t mem_id, HsaMemoryProperties *props) { + FILE *fd; + char *read_buf, *p; + char prop_name[256]; + char path[256]; + long long unsigned int prop_val; + uint32_t prog; + int read_size; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + assert(props); + snprintf(path, 256, "%s/%d/mem_banks/%d/properties", KFD_SYSFS_PATH_NODES, node_id, mem_id); + fd = fopen(path, "r"); + if (!fd) { + return HSAKMT_STATUS_ERROR; + } + read_buf = malloc(PAGE_SIZE); + if (!read_buf) { + ret = HSAKMT_STATUS_NO_MEMORY; + goto err1; + } + + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { + ret = HSAKMT_STATUS_ERROR; + goto err2; + } + + /* Since we're using the buffer as a string, we make sure the string terminates */ + if(read_size >= PAGE_SIZE) + read_size = PAGE_SIZE-1; + read_buf[read_size] = 0; + + prog = 0; + p = read_buf; + while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name,"heap_type") == 0) + props->HeapType = (uint32_t)prop_val; + else if (strcmp(prop_name,"size_in_bytes") == 0) + props->SizeInBytes = prop_val; + else if (strcmp(prop_name,"flags") == 0) + props->Flags.MemoryProperty = (uint32_t)prop_val; + else if (strcmp(prop_name,"width") == 0) + props->Width = (uint32_t)prop_val; + else if (strcmp(prop_name,"mem_clk_max") == 0) + props->MemoryClockMax = (uint32_t)prop_val; + } + +err2: + free(read_buf); +err1: + fclose(fd); + return ret; +} + +static HSAKMT_STATUS +topology_sysfs_get_cache_props(uint32_t node_id, uint32_t cache_id, HsaCacheProperties *props) { + FILE *fd; + char *read_buf, *p; + char prop_name[256]; + char path[256]; + long long unsigned int prop_val; + uint32_t i, prog; + int read_size; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + assert(props); + snprintf(path, 256, "%s/%d/caches/%d/properties", KFD_SYSFS_PATH_NODES, node_id, cache_id); + fd = fopen(path, "r"); + if (!fd) { + return HSAKMT_STATUS_ERROR; + } + read_buf = malloc(PAGE_SIZE); + if (!read_buf) { + ret = HSAKMT_STATUS_NO_MEMORY; + goto err1; + } + + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { + ret = HSAKMT_STATUS_ERROR; + goto err2; + } + + /* Since we're using the buffer as a string, we make sure the string terminates */ + if(read_size >= PAGE_SIZE) + read_size = PAGE_SIZE-1; + read_buf[read_size] = 0; + + prog = 0; + p = read_buf; + while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name,"processor_id_low") == 0) + props->ProcessorIdLow = (uint32_t)prop_val; + else if (strcmp(prop_name,"level") == 0) + props->CacheLevel = (uint32_t)prop_val; + else if (strcmp(prop_name,"size") == 0) + props->CacheSize = (uint32_t)prop_val; + else if (strcmp(prop_name,"cache_line_size") == 0) + props->CacheLineSize = (uint32_t)prop_val; + else if (strcmp(prop_name,"cache_lines_per_tag") == 0) + props->CacheLinesPerTag = (uint32_t)prop_val; + else if (strcmp(prop_name,"association") == 0) + props->CacheAssociativity = (uint32_t)prop_val; + else if (strcmp(prop_name,"latency") == 0) + props->CacheLatency = (uint32_t)prop_val; + else if (strcmp(prop_name,"type") == 0) + props->CacheType.Value = (uint32_t)prop_val; + else if (strcmp(prop_name, "sibling_map") == 0) + break; + } + + prog = 0; + if ((sscanf(p, "sibling_map %n", &prog)) == 0 && prog) { + i = 0; + while ((i < HSA_CPU_SIBLINGS) && + (sscanf(p+=prog, "%u%*[,\n]%n", &props->SiblingMap[i++], + &prog) == 1)); + } + +err2: + free(read_buf); +err1: + fclose(fd); + return ret; +} + +static HSAKMT_STATUS +topology_sysfs_get_iolink_props(uint32_t node_id, uint32_t iolink_id, HsaIoLinkProperties *props) { + FILE *fd; + char *read_buf, *p; + char prop_name[256]; + char path[256]; + long long unsigned int prop_val; + uint32_t prog; + int read_size; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + + assert(props); + snprintf(path, 256, "%s/%d/io_link/%d/properties", KFD_SYSFS_PATH_NODES, node_id, iolink_id); + fd = fopen(path, "r"); + if (!fd) { + return HSAKMT_STATUS_ERROR; + } + read_buf = malloc(PAGE_SIZE); + if (!read_buf) { + ret = HSAKMT_STATUS_NO_MEMORY; + goto err1; + } + + read_size = fread(read_buf, 1, PAGE_SIZE, fd); + if (read_size <= 0) { + ret = HSAKMT_STATUS_ERROR; + goto err2; + } + + /* Since we're using the buffer as a string, we make sure the string terminates */ + if(read_size >= PAGE_SIZE) + read_size = PAGE_SIZE-1; + read_buf[read_size] = 0; + + prog = 0; + p = read_buf; + while(sscanf(p+=prog, "%s %llu\n%n", prop_name, &prop_val, &prog) == 2) { + if (strcmp(prop_name,"type") == 0) + props->IoLinkType = (uint32_t)prop_val; + else if (strcmp(prop_name,"version_major") == 0) + props->VersionMajor = (uint32_t)prop_val; + else if (strcmp(prop_name,"version_minor") == 0) + props->VersionMinor = (uint32_t)prop_val; + else if (strcmp(prop_name,"node_from") == 0) + props->NodeFrom = (uint32_t)prop_val; + else if (strcmp(prop_name,"node_to") == 0) + props->NodeTo = (uint32_t)prop_val; + else if (strcmp(prop_name,"weight") == 0) + props->Weight = (uint32_t)prop_val; + else if (strcmp(prop_name,"min_latency") == 0) + props->MinimumLatency = (uint32_t)prop_val; + else if (strcmp(prop_name,"max_latency") == 0) + props->MaximumLatency = (uint32_t)prop_val; + else if (strcmp(prop_name,"min_bandwidth") == 0) + props->MinimumBandwidth = (uint32_t)prop_val; + else if (strcmp(prop_name,"max_bandwidth") == 0) + props->MaximumBandwidth = (uint32_t)prop_val; + else if (strcmp(prop_name,"recommended_transfer_size") == 0) + props->RecTransferSize = (uint32_t)prop_val; + else if (strcmp(prop_name,"flags") == 0) + props->Flags.LinkProperty = (uint32_t)prop_val; + } + + +err2: + free(read_buf); +err1: + fclose(fd); + return ret; +} + +HSAKMT_STATUS +topology_take_snapshot(void) +{ + uint32_t gen_start, gen_end, i, j, mem_id, cache_id, link_id; + HsaSystemProperties sys_props; + node_t *temp_nodes = 0; + HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; + +retry: + ret = topology_sysfs_get_generation(&gen_start); + if (ret != HSAKMT_STATUS_SUCCESS) + return ret; + ret = topology_sysfs_get_system_props(&sys_props); + if (ret != HSAKMT_STATUS_SUCCESS) + return ret; + if(sys_props.NumNodes > 0) { + temp_nodes = calloc(sys_props.NumNodes * sizeof(node_t),1); + if (!temp_nodes) + return HSAKMT_STATUS_NO_MEMORY; + for (i = 0; i < sys_props.NumNodes; i++) { + ret = topology_sysfs_get_node_props(i, + &temp_nodes[i].node, + &temp_nodes[i].gpu_id); + if (ret != HSAKMT_STATUS_SUCCESS) { + for (j=0; j < i; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + goto err; + } + if (temp_nodes[i].node.NumMemoryBanks) { + temp_nodes[i].mem = calloc(temp_nodes[i].node.NumMemoryBanks * sizeof(HsaMemoryProperties), 1); + if (!temp_nodes[i].mem) { + ret = HSAKMT_STATUS_NO_MEMORY; + for (j=0; j <= i; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + goto err; + } + for (mem_id = 0; mem_id < temp_nodes[i].node.NumMemoryBanks; mem_id++) { + ret = topology_sysfs_get_mem_props(i, mem_id, &temp_nodes[i].mem[mem_id]); + if (ret != HSAKMT_STATUS_SUCCESS) { + for (j=0; j <= i; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + goto err; + } + } + } + + if (temp_nodes[i].node.NumCaches) { + temp_nodes[i].cache = calloc(temp_nodes[i].node.NumCaches * sizeof(HsaCacheProperties), 1); + if (!temp_nodes[i].cache) { + ret = HSAKMT_STATUS_NO_MEMORY; + for (j=0; j <= i; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + goto err; + } + for (cache_id = 0; cache_id < temp_nodes[i].node.NumCaches; cache_id++) { + ret = topology_sysfs_get_cache_props(i, cache_id, &temp_nodes[i].cache[cache_id]); + if (ret != HSAKMT_STATUS_SUCCESS) { + for (j=0; j <= i; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + goto err; + } + } + } + + if (temp_nodes[i].node.NumIOLinks) { + temp_nodes[i].link = calloc(temp_nodes[i].node.NumIOLinks * sizeof(HsaIoLinkProperties), 1); + if (!temp_nodes[i].link) { + ret = HSAKMT_STATUS_NO_MEMORY; + for (j=0; j <= i; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + goto err; + } + for (link_id = 0; link_id < temp_nodes[i].node.NumIOLinks; link_id++) { + ret = topology_sysfs_get_iolink_props(i, link_id, &temp_nodes[i].link[link_id]); + if (ret != HSAKMT_STATUS_SUCCESS) { + for (j=0; j <= i; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + goto err; + } + } + } + + } + } + + ret = topology_sysfs_get_generation(&gen_end); + if (ret != HSAKMT_STATUS_SUCCESS) { + if (temp_nodes) { + for (j=0; j < sys_props.NumNodes; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + } + goto err; + } + + if (gen_start != gen_end) { + if (temp_nodes) { + for (j=0; j < sys_props.NumNodes; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + temp_nodes = 0; + } + goto retry; + } + + if (!system) { + system = malloc(sizeof(HsaSystemProperties)); + if (!system) { + if (temp_nodes) { + for (j=0; j < sys_props.NumNodes; j++) + free_node(&temp_nodes[j]); + free(temp_nodes); + } + return HSAKMT_STATUS_NO_MEMORY; + } + } + + *system = sys_props; + if (node) + free(node); + node = temp_nodes; +err: + + return ret; +} + +/* + * Drop the Snashot of the HSA topology information. + * Assume lock is held. + */ +HSAKMT_STATUS +topology_drop_snapshot(void) +{ + HSAKMT_STATUS err; + + if (!!system != !!node) { + printf("Probable inconsistency?\n"); + err = HSAKMT_STATUS_SUCCESS; + goto out; + } + + if (node) { + uint64_t nodeid; + + /* Remove state */ + for (nodeid = 0; nodeid < system->NumNodes; nodeid++) { + free_node(&node[nodeid]); + } + + free(node); + node = NULL; + } + + free(system); + system = NULL; + err = HSAKMT_STATUS_SUCCESS; + +out: + return err; +} + +HSAKMT_STATUS +validate_nodeid(uint32_t nodeid, uint32_t *gpu_id) +{ + if (nodeid >= MAX_NODES || !node || !system || system->NumNodes <= nodeid) + return HSAKMT_STATUS_INVALID_NODE_UNIT; + if (gpu_id) + *gpu_id = node[nodeid].gpu_id; + + return HSAKMT_STATUS_SUCCESS; +} HSAKMT_STATUS HSAKMTAPI @@ -37,9 +696,26 @@ hsaKmtAcquireSystemProperties( HsaSystemProperties* SystemProperties //OUT ) { + HSAKMT_STATUS err; CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + if (!SystemProperties) + return HSAKMT_STATUS_INVALID_PARAMETER; + + pthread_mutex_lock(&hsakmt_mutex); + + err = topology_take_snapshot(); + if (err != HSAKMT_STATUS_SUCCESS) + goto out; + + assert(system); + + *SystemProperties = *system; + err = HSAKMT_STATUS_SUCCESS; + +out: + pthread_mutex_unlock(&hsakmt_mutex); + return err; } HSAKMT_STATUS @@ -48,7 +724,15 @@ hsaKmtReleaseSystemProperties(void) { CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + HSAKMT_STATUS err; + + pthread_mutex_lock(&hsakmt_mutex); + + err = topology_drop_snapshot(); + + pthread_mutex_unlock(&hsakmt_mutex); + + return err; } HSAKMT_STATUS @@ -58,9 +742,39 @@ hsaKmtGetNodeProperties( HsaNodeProperties* NodeProperties //OUT ) { + HSAKMT_STATUS err; + uint32_t gpu_id; + + if (!NodeProperties) + return HSAKMT_STATUS_INVALID_PARAMETER; + CHECK_KFD_OPEN(); + pthread_mutex_lock(&hsakmt_mutex); + + /* KFD ADD page 18, snapshot protocol violation */ + if (system == NULL) { + err = HSAKMT_STATUS_INVALID_NODE_UNIT; + assert(system); + goto out; + } + + if (NodeId >= system->NumNodes) { + err = HSAKMT_STATUS_INVALID_PARAMETER; + goto out; + } + + err = validate_nodeid(NodeId, &gpu_id); + if (err != HSAKMT_STATUS_SUCCESS) + return err; - return HSAKMT_STATUS_NOT_SUPPORTED; + *NodeProperties = node[NodeId].node; + NodeProperties->NumMemoryBanks += NUM_OF_HEAPS; + + err = HSAKMT_STATUS_SUCCESS; + +out: + pthread_mutex_unlock(&hsakmt_mutex); + return err; } HSAKMT_STATUS @@ -71,9 +785,58 @@ hsaKmtGetNodeMemoryProperties( HsaMemoryProperties* MemoryProperties //OUT ) { + HSAKMT_STATUS err; + uint32_t i, gpu_id; + + if (!MemoryProperties) + return HSAKMT_STATUS_INVALID_PARAMETER; + CHECK_KFD_OPEN(); + pthread_mutex_lock(&hsakmt_mutex); - return HSAKMT_STATUS_NOT_SUPPORTED; + /* KFD ADD page 18, snapshot protocol violation */ + if (system == NULL) { + err = HSAKMT_STATUS_INVALID_NODE_UNIT; + assert(system); + goto out; + } + + /* Check still necessary */ + if (NodeId >= system->NumNodes ) { + err = HSAKMT_STATUS_INVALID_PARAMETER; + goto out; + } + + err = validate_nodeid(NodeId, &gpu_id); + if (err != HSAKMT_STATUS_SUCCESS) + return err; + + for (i = 0; i < MIN(node[NodeId].node.NumMemoryBanks, NumBanks); i++) { + assert(node[NodeId].mem); + MemoryProperties[i] = node[NodeId].mem[i]; + } + + /*Add LDS*/ + if (i < NumBanks){ + MemoryProperties[i].HeapType = HSA_HEAPTYPE_GPU_LDS; + MemoryProperties[i].SizeInBytes = node[NodeId].node.LDSSizeInKB * 1024; + MemoryProperties[i].VirtualBaseAddress = fmm_get_aperture_base(FMM_LDS, gpu_id); + i++; + } + + /*Add Local memory - HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE*/ + if (i < NumBanks){ + MemoryProperties[i].HeapType = HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE; + MemoryProperties[i].SizeInBytes = node[NodeId].node.LocalMemSize; + MemoryProperties[i].VirtualBaseAddress = fmm_get_aperture_base(FMM_GPUVM, gpu_id); + i++; + } + + err = HSAKMT_STATUS_SUCCESS; + +out: + pthread_mutex_unlock(&hsakmt_mutex); + return err; } HSAKMT_STATUS @@ -85,9 +848,37 @@ hsaKmtGetNodeCacheProperties( HsaCacheProperties* CacheProperties //OUT ) { + HSAKMT_STATUS err; + uint32_t i; + + if (!CacheProperties) + return HSAKMT_STATUS_INVALID_PARAMETER; + CHECK_KFD_OPEN(); + pthread_mutex_lock(&hsakmt_mutex); + + /* KFD ADD page 18, snapshot protocol violation */ + if (system == NULL) { + err = HSAKMT_STATUS_INVALID_NODE_UNIT; + assert(system); + goto out; + } + + if (NodeId >= system->NumNodes || NumCaches > node[NodeId].node.NumCaches) { + err = HSAKMT_STATUS_INVALID_PARAMETER; + goto out; + } + + for (i = 0; i < MIN(node[NodeId].node.NumCaches, NumCaches); i++) { + assert(node[NodeId].cache); + CacheProperties[i] = node[NodeId].cache[i]; + } - return HSAKMT_STATUS_NOT_SUPPORTED; + err = HSAKMT_STATUS_SUCCESS; + +out: + pthread_mutex_unlock(&hsakmt_mutex); + return err; } HSAKMT_STATUS @@ -98,7 +889,44 @@ hsaKmtGetNodeIoLinkProperties( HsaIoLinkProperties* IoLinkProperties //OUT ) { + HSAKMT_STATUS err; + uint32_t i; + + if (!IoLinkProperties) + return HSAKMT_STATUS_INVALID_PARAMETER; + CHECK_KFD_OPEN(); - return HSAKMT_STATUS_NOT_SUPPORTED; + pthread_mutex_lock(&hsakmt_mutex); + + /* KFD ADD page 18, snapshot protocol violation */ + if (system == NULL) { + err = HSAKMT_STATUS_INVALID_NODE_UNIT; + assert(system); + goto out; + } + + if (NodeId >= system->NumNodes || NumIoLinks > node[NodeId].node.NumIOLinks) { + err = HSAKMT_STATUS_INVALID_PARAMETER; + goto out; + } + + for (i = 0; i < MIN(node[NodeId].node.NumIOLinks, NumIoLinks); i++) { + assert(node[NodeId].link); + IoLinkProperties[i] = node[NodeId].link[i]; + } + + err = HSAKMT_STATUS_SUCCESS; + +out: + pthread_mutex_unlock(&hsakmt_mutex); + return err; +} + +uint16_t get_device_id_by_node(HSAuint32 node_id) +{ + if (!node || !system || system->NumNodes <= node_id) + return 0; + + return node[node_id].node.DeviceId; } |