diff --git a/src/common/printer.cpp b/src/common/printer.cpp index 5f5860a..abca269 100644 --- a/src/common/printer.cpp +++ b/src/common/printer.cpp @@ -54,7 +54,7 @@ enum { ATTRIBUTE_CUDA_CORES, // CUDA ATTRIBUTE_TENSOR_CORES, // CUDA ATTRIBUTE_L2, // CUDA - ATTRIBUTE_MEMORY, // CUDA + ATTRIBUTE_MEMORY, // CUDA,HSA ATTRIBUTE_MEMORY_FREQ, // CUDA ATTRIBUTE_BUS_WIDTH, // CUDA,HSA ATTRIBUTE_PEAK_TENSOR, // CUDA @@ -490,6 +490,7 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct char* cus = get_str_cu(gpu); char* max_frequency = get_str_freq(gpu); char* bus_width = get_str_bus_width(gpu); + char* mem_size = get_str_memory_size(gpu); char* lds_size = get_str_lds_size(gpu); setAttribute(art, ATTRIBUTE_NAME, gpu_name); @@ -500,8 +501,9 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus); - setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); setAttribute(art, ATTRIBUTE_LDS_SIZE, lds_size); + setAttribute(art, ATTRIBUTE_MEMORY, mem); + setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); bool use_short = false; uint32_t longest_attribute = longest_attribute_length(art, use_short); diff --git a/src/hsa/hsa.cpp b/src/hsa/hsa.cpp index 67e7c2a..2cdcae6 100644 --- a/src/hsa/hsa.cpp +++ b/src/hsa/hsa.cpp @@ -25,6 +25,7 @@ struct agent_info { uint32_t compute_unit; uint32_t bus_width; uint32_t lds_size; + uint32_t global_size; }; #define RET_IF_HSA_ERR(err) { \ @@ -42,18 +43,31 @@ struct agent_info { } \ } -hsa_status_t get_lds_size_callback(hsa_region_t region, void* data) { +hsa_status_t region_callback(hsa_region_t region, void* data) { hsa_region_segment_t segment; hsa_status_t err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment); - RET_IF_HSA_ERR(err); + RET_IF_HSA_ERR(err); + + uint32_t *mems = reinterpret_cast(data); if (segment == HSA_REGION_SEGMENT_GROUP) { + // LDS memory size_t size = 0; err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &size); RET_IF_HSA_ERR(err); - *(size_t*)data = size; + mems[0] = size; + } + else if (segment == HSA_REGION_SEGMENT_GLOBAL) { + // Global memory + // LDS memory + size_t size = 0; + + err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &size); + RET_IF_HSA_ERR(err); + + mems[1] = size; } return HSA_STATUS_SUCCESS; } @@ -85,8 +99,12 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) { err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width); RET_IF_HSA_ERR(err); - err = hsa_agent_iterate_regions(agent, get_lds_size_callback, &info->lds_size); + uint32_t mems[2]; + err = hsa_agent_iterate_regions(agent, region_callback, &mems); RET_IF_HSA_ERR(err); + + info->lds_size = mems[0]; + info->global_size = mems[1]; } return HSA_STATUS_SUCCESS; @@ -105,6 +123,7 @@ struct memory* get_memory_info(struct gpu_info* gpu, struct agent_info info) { mem->bus_width = info.bus_width; mem->lds_size = info.lds_size; + mem->size_bytes = info.global_size; return mem; }