#include #include #include #include #include #include #include #include #include #include #include "hsa.hpp" #include "uarch.hpp" #include "../common/global.hpp" #include "../common/uarch.hpp" struct agent_info { unsigned deviceId; // ID of the target GPU device char gpu_name[64]; char vendor_name[64]; char device_mkt_name[64]; uint32_t max_clock_freq; // Memory uint32_t bus_width; uint32_t lds_size; uint64_t global_size; // Topology uint32_t compute_unit; uint32_t num_shader_engines; uint32_t simds_per_cu; uint32_t num_xcc; // Acccelerator Complex Dies (XCDs) uint32_t matrix_cores; // Cores with WMMA/MFMA capabilities }; #define RET_IF_HSA_ERR(err) { \ if ((err) != HSA_STATUS_SUCCESS) { \ char err_val[12]; \ char* err_str = NULL; \ if (hsa_status_string(err, \ (const char**)&err_str) != HSA_STATUS_SUCCESS) { \ snprintf(&(err_val[0]), sizeof(err_val), "%#x", (uint32_t)err); \ err_str = &(err_val[0]); \ } \ printErr("HSA failure at: %s:%d\n", __FILE__, __LINE__); \ printErr("Call returned %s\n", err_str); \ return (err); \ } \ } hsa_status_t memory_pool_callback(hsa_amd_memory_pool_t pool, void* data) { struct agent_info* info = reinterpret_cast(data); hsa_amd_segment_t segment; hsa_status_t err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); RET_IF_HSA_ERR(err); if (segment == HSA_AMD_SEGMENT_GROUP) { // LDS memory // We want to make sure that this memory pool is not repeated. if (info->lds_size != 0) { printErr("Found HSA_AMD_SEGMENT_GROUP twice!"); return HSA_STATUS_ERROR; } uint32_t size = 0; err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size); RET_IF_HSA_ERR(err); info->lds_size = size; } else if (segment == HSA_AMD_SEGMENT_GLOBAL) { // Global memory uint32_t global_flags = 0; err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flags); RET_IF_HSA_ERR(err); if (global_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) { if (info->global_size != 0) { printErr("Found HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED twice!"); return HSA_STATUS_ERROR; } uint64_t size = 0; err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size); RET_IF_HSA_ERR(err); info->global_size = size; } } return HSA_STATUS_SUCCESS; } hsa_status_t agent_callback(hsa_agent_t agent, void *data) { struct agent_info* info = reinterpret_cast(data); hsa_device_type_t type; hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); RET_IF_HSA_ERR(err); if (type == HSA_DEVICE_TYPE_GPU) { err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, info->gpu_name); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, info->vendor_name); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_PRODUCT_NAME, &info->device_mkt_name); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &info->max_clock_freq); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit); RET_IF_HSA_ERR(err); // According to the documentation, this is deprecated. But what should I be using then? err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &info->num_shader_engines); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &info->simds_per_cu); RET_IF_HSA_ERR(err); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_XCC, &info->num_xcc); RET_IF_HSA_ERR(err); // We will check against zero to see if it was set beforehand. info->global_size = 0; info->lds_size = 0; // This will fill global_size and lds_size. err = hsa_amd_agent_iterate_memory_pools(agent, memory_pool_callback, data); RET_IF_HSA_ERR(err); } return HSA_STATUS_SUCCESS; } struct topology_h* get_topology_info(struct agent_info info) { struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h)); topo->compute_units = info.compute_unit; topo->num_shader_engines = info.num_shader_engines; // not printed at the moment topo->simds_per_cu = info.simds_per_cu; // not printed at the moment topo->num_xcc = info.num_xcc; // Old GPUs (GCN I guess) might not have matrix cores. // Not sure what would happen here? topo->matrix_cores = topo->compute_units * topo->simds_per_cu; return topo; } struct memory* get_memory_info(struct gpu_info* gpu, struct agent_info info) { struct memory* mem = (struct memory*) emalloc(sizeof(struct memory)); mem->bus_width = info.bus_width; mem->lds_size = info.lds_size; mem->size_bytes = info.global_size; return mem; } struct gpu_info* get_gpu_info_hsa(int gpu_idx) { struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); gpu->pci = NULL; gpu->idx = gpu_idx; if(gpu->idx < 0) { printErr("GPU index must be equal or greater than zero"); return NULL; } if(gpu->idx > 0) { // Currently we only support fetching GPU 0. return NULL; } hsa_status_t err = hsa_init(); if (err != HSA_STATUS_SUCCESS) { printErr("Failed to initialize HSA runtime"); return NULL; } struct agent_info info; info.deviceId = gpu_idx; // Iterate over all agents in the system err = hsa_iterate_agents(agent_callback, &info); if (err != HSA_STATUS_SUCCESS) { printErr("Failed to iterate HSA agents"); hsa_shut_down(); return NULL; } if (strcmp(info.vendor_name, "AMD") != 0) { printErr("HSA vendor name is: '%s'. Only AMD is supported!", info.vendor_name); return NULL; } gpu->vendor = GPU_VENDOR_AMD; gpu->freq = info.max_clock_freq; gpu->topo_h = get_topology_info(info); gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1)); strcpy(gpu->name, info.device_mkt_name); gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name); gpu->mem = get_memory_info(gpu, info); if (gpu->arch == NULL) { return NULL; } // Shut down the HSA runtime err = hsa_shut_down(); if (err != HSA_STATUS_SUCCESS) { printErr("Failed to shutdown HSA runtime"); return NULL; } return gpu; } char* get_str_cu(struct gpu_info* gpu) { return get_str_generic(gpu->topo_h->compute_units); } char* get_str_xcds(struct gpu_info* gpu) { // If there is a single XCD, then we dont want to // print it. if (gpu->topo_h->num_xcc == 1) { return NULL; } return get_str_generic(gpu->topo_h->num_xcc); } char* get_str_matrix_cores(struct gpu_info* gpu) { // TODO: Show XX (WMMA/MFMA) return get_str_generic(gpu->topo_h->matrix_cores); }