diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp index b824140..7e27133 100644 --- a/src/common/gpu.cpp +++ b/src/common/gpu.cpp @@ -101,6 +101,17 @@ char* get_str_bus_width(struct gpu_info* gpu) { return string; } +char* get_str_lds_size(struct gpu_info* gpu) { + // TODO: Show XX KB (XX MB Total) like in cpufetch + uint32_t size = 3+1+3+1; + assert(strlen(STRING_UNKNOWN)+1 <= size); + char* string = (char *) ecalloc(size, sizeof(char)); + + sprintf(string, "%d KB", gpu->mem->lds_size / 1024); + + return string; +} + char* get_str_memory_clock(struct gpu_info* gpu) { return get_freq_as_str_mhz(gpu->mem->freq); } diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index 3d35faf..73baff4 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -61,6 +61,7 @@ struct memory { int32_t bus_width; int32_t freq; int32_t clk_mul; // clock multiplier + int32_t lds_size; // HSA specific for now }; struct gpu_info { @@ -88,6 +89,7 @@ char* get_str_freq(struct gpu_info* gpu); char* get_str_memory_size(struct gpu_info* gpu); char* get_str_memory_type(struct gpu_info* gpu); char* get_str_bus_width(struct gpu_info* gpu); +char* get_str_lds_size(struct gpu_info* gpu); char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu); diff --git a/src/common/printer.cpp b/src/common/printer.cpp index 8a7ae31..cd3e452 100644 --- a/src/common/printer.cpp +++ b/src/common/printer.cpp @@ -48,14 +48,15 @@ enum { ATTRIBUTE_FREQUENCY, // ALL ATTRIBUTE_PEAK, // ALL ATTRIBUTE_COMPUTE_UNITS, // HSA + ATTRIBUTE_LDS_SIZE, // HSA ATTRIBUTE_STREAMINGMP, // CUDA ATTRIBUTE_CORESPERMP, // CUDA ATTRIBUTE_CUDA_CORES, // CUDA ATTRIBUTE_TENSOR_CORES, // CUDA ATTRIBUTE_L2, // CUDA - ATTRIBUTE_MEMORY, // CUDA + ATTRIBUTE_MEMORY, // CUDA,HSA ATTRIBUTE_MEMORY_FREQ, // CUDA - ATTRIBUTE_BUS_WIDTH, // CUDA + ATTRIBUTE_BUS_WIDTH, // CUDA,HSA ATTRIBUTE_PEAK_TENSOR, // CUDA ATTRIBUTE_EUS, // Intel ATTRIBUTE_GT, // Intel @@ -69,6 +70,7 @@ static const AttributeField ATTRIBUTE_INFO[] = { { ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" }, { ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" }, { ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" }, + { ATTRIBUTE_LDS_SIZE, "LDS size:", "LDS:" }, { ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" }, { ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" }, { ATTRIBUTE_CUDA_CORES, "CUDA Cores:", "CUDA Cores:" }, @@ -487,6 +489,9 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct char* manufacturing_process = get_str_process(gpu->arch); char* cus = get_str_cu(gpu); char* max_frequency = get_str_freq(gpu); + char* bus_width = get_str_bus_width(gpu); + char* mem_size = get_str_memory_size(gpu); + char* lds_size = get_str_lds_size(gpu); setAttribute(art, ATTRIBUTE_NAME, gpu_name); if (gpu_chip != NULL) { @@ -496,6 +501,9 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus); + setAttribute(art, ATTRIBUTE_LDS_SIZE, lds_size); + setAttribute(art, ATTRIBUTE_MEMORY, mem_size); + setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); bool use_short = false; uint32_t longest_attribute = longest_attribute_length(art, use_short); diff --git a/src/hsa/hsa.cpp b/src/hsa/hsa.cpp index da58673..3e55165 100644 --- a/src/hsa/hsa.cpp +++ b/src/hsa/hsa.cpp @@ -23,6 +23,9 @@ struct agent_info { char device_mkt_name[64]; uint32_t max_clock_freq; uint32_t compute_unit; + uint32_t bus_width; + uint32_t lds_size; + uint64_t global_size; }; #define RET_IF_HSA_ERR(err) { \ @@ -40,6 +43,51 @@ struct agent_info { } \ } +hsa_status_t memory_pool_callback(hsa_amd_memory_pool_t pool, void* data) { + struct agent_info* info = reinterpret_cast(data); + + hsa_amd_segment_t segment; + hsa_status_t err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); + RET_IF_HSA_ERR(err); + + if (segment == HSA_AMD_SEGMENT_GROUP) { + // LDS memory + // We want to make sure that this memory pool is not repeated. + if (info->lds_size != 0) { + printErr("Found HSA_AMD_SEGMENT_GROUP twice!"); + return HSA_STATUS_ERROR; + } + uint32_t size = 0; + + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size); + RET_IF_HSA_ERR(err); + + info->lds_size = size; + } + else if (segment == HSA_AMD_SEGMENT_GLOBAL) { + // Global memory + uint32_t global_flags = 0; + + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flags); + RET_IF_HSA_ERR(err); + + if (global_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) { + if (info->global_size != 0) { + printErr("Found HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED twice!"); + return HSA_STATUS_ERROR; + } + + uint64_t size = 0; + + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size); + RET_IF_HSA_ERR(err); + + info->global_size = size; + } + } + return HSA_STATUS_SUCCESS; +} + hsa_status_t agent_callback(hsa_agent_t agent, void *data) { struct agent_info* info = reinterpret_cast(data); @@ -62,6 +110,17 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) { err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit); RET_IF_HSA_ERR(err); + + // According to the documentation, this is deprecated. But what should I be using then? + err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width); + RET_IF_HSA_ERR(err); + + // We will check against zero to see if it was set beforehand. + info->global_size = 0; + info->lds_size = 0; + // This will fill global_size and lds_size. + err = hsa_amd_agent_iterate_memory_pools(agent, memory_pool_callback, data); + RET_IF_HSA_ERR(err); } return HSA_STATUS_SUCCESS; @@ -75,6 +134,16 @@ struct topology_h* get_topology_info(struct agent_info info) { return topo; } +struct memory* get_memory_info(struct gpu_info* gpu, struct agent_info info) { + struct memory* mem = (struct memory*) emalloc(sizeof(struct memory)); + + mem->bus_width = info.bus_width; + mem->lds_size = info.lds_size; + mem->size_bytes = info.global_size; + + return mem; +} + struct gpu_info* get_gpu_info_hsa(int gpu_idx) { struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); gpu->pci = NULL; @@ -118,6 +187,7 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) { gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1)); strcpy(gpu->name, info.device_mkt_name); gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name); + gpu->mem = get_memory_info(gpu, info); if (gpu->arch == NULL) { return NULL;