10 Commits

Author SHA1 Message Date
Dr-Noob
84e6021a95 Remove TODO 2025-10-26 10:47:27 +01:00
Dr-Noob
a4916255cf Fix 2025-10-26 10:44:09 +01:00
Dr-Noob
b5dc30d4b3 Add matrix cores 2025-10-26 10:42:25 +01:00
Dr-Noob
2fa90179b4 Fix 2025-10-26 10:28:41 +01:00
Dr-Noob
711936be81 Show XCDs 2025-10-26 10:27:51 +01:00
Dr-Noob
94a9a440f0 Basic support 2025-10-23 21:40:14 +02:00
Dr-Noob
78d34e71f1 [v0.30][AMD] Add support to fetch bus width, global memory and LDS size
We can use hsa_amd_agent_iterate_memory_pools to fetch info about GPU
memory pools in the GPU. HSA_AMD_SEGMENT_GROUP seems to be LDS, and
HSA_AMD_SEGMENT_GLOBAL seems to be global memory.

However, the latter is reported multiple times (I don't know why). The
only solution I found for this is to check for the
HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED flag, which
seems to be reported only once.

For bus width, we simply use HSA_AMD_AGENT_INFO_MEMORY_WIDTH.
2025-10-23 21:30:02 +02:00
Dr-Noob
82ea16fc3d [v0.30] Fix warning in printer 2025-10-16 20:01:14 +02:00
Dr-Noob
6589de9717 [v0.30] Reorganize attributes in printer and add CUs attr for AMD 2025-10-16 19:53:48 +02:00
Dr-Noob
0950b97393 [v0.30] Build pciutils only if neccesary
If only HSA is enabled we dont need pciutils since AMD detection does
not rely on it. Therefore we change CMakeLists.txt to build pciutils
only if required.

This commit has some side-effects:
1. We now don't build Intel backend by default. In other words, no
   backend is built by default, the user must specify which backend
   to use.
2. There were some issues with includes and wrongly used defines and
   variables. This commit fixes all that.
2025-10-16 08:26:42 +02:00
5 changed files with 213 additions and 80 deletions

View File

@@ -101,6 +101,17 @@ char* get_str_bus_width(struct gpu_info* gpu) {
return string; return string;
} }
char* get_str_lds_size(struct gpu_info* gpu) {
// TODO: Show XX KB (XX MB Total) like in cpufetch
uint32_t size = 3+1+3+1;
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = (char *) ecalloc(size, sizeof(char));
sprintf(string, "%d KB", gpu->mem->lds_size / 1024);
return string;
}
char* get_str_memory_clock(struct gpu_info* gpu) { char* get_str_memory_clock(struct gpu_info* gpu) {
return get_freq_as_str_mhz(gpu->mem->freq); return get_freq_as_str_mhz(gpu->mem->freq);
} }

View File

@@ -46,6 +46,10 @@ struct topology_c {
// HSA topology // HSA topology
struct topology_h { struct topology_h {
int32_t compute_units; int32_t compute_units;
int32_t num_shader_engines;
int32_t simds_per_cu;
int32_t num_xcc;
int32_t matrix_cores;
}; };
// Intel topology // Intel topology
@@ -61,6 +65,7 @@ struct memory {
int32_t bus_width; int32_t bus_width;
int32_t freq; int32_t freq;
int32_t clk_mul; // clock multiplier int32_t clk_mul; // clock multiplier
int32_t lds_size; // HSA specific for now
}; };
struct gpu_info { struct gpu_info {
@@ -88,6 +93,7 @@ char* get_str_freq(struct gpu_info* gpu);
char* get_str_memory_size(struct gpu_info* gpu); char* get_str_memory_size(struct gpu_info* gpu);
char* get_str_memory_type(struct gpu_info* gpu); char* get_str_memory_type(struct gpu_info* gpu);
char* get_str_bus_width(struct gpu_info* gpu); char* get_str_bus_width(struct gpu_info* gpu);
char* get_str_lds_size(struct gpu_info* gpu);
char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu);

View File

@@ -32,64 +32,60 @@
#define MAX_ATTRIBUTES 100 #define MAX_ATTRIBUTES 100
#define MAX_TERM_SIZE 1024 #define MAX_TERM_SIZE 1024
typedef struct {
int id;
const char *name;
const char *shortname;
} AttributeField;
// AttributeField IDs
// Used by
enum { enum {
ATTRIBUTE_NAME, ATTRIBUTE_NAME, // ALL
ATTRIBUTE_CHIP, ATTRIBUTE_CHIP, // ALL
ATTRIBUTE_UARCH, ATTRIBUTE_UARCH, // ALL
ATTRIBUTE_TECHNOLOGY, ATTRIBUTE_TECHNOLOGY, // ALL
ATTRIBUTE_GT, ATTRIBUTE_FREQUENCY, // ALL
ATTRIBUTE_FREQUENCY, ATTRIBUTE_PEAK, // ALL
ATTRIBUTE_STREAMINGMP, ATTRIBUTE_COMPUTE_UNITS, // HSA
ATTRIBUTE_CORESPERMP, ATTRIBUTE_MATRIX_CORES, // HSA
ATTRIBUTE_CUDA_CORES, ATTRIBUTE_XCDS, // HSA
ATTRIBUTE_TENSOR_CORES, ATTRIBUTE_LDS_SIZE, // HSA
ATTRIBUTE_EUS, ATTRIBUTE_STREAMINGMP, // CUDA
ATTRIBUTE_L2, ATTRIBUTE_CORESPERMP, // CUDA
ATTRIBUTE_MEMORY, ATTRIBUTE_CUDA_CORES, // CUDA
ATTRIBUTE_MEMORY_FREQ, ATTRIBUTE_TENSOR_CORES, // CUDA
ATTRIBUTE_BUS_WIDTH, ATTRIBUTE_L2, // CUDA
ATTRIBUTE_PEAK, ATTRIBUTE_MEMORY, // CUDA,HSA
ATTRIBUTE_PEAK_TENSOR, ATTRIBUTE_MEMORY_FREQ, // CUDA
ATTRIBUTE_BUS_WIDTH, // CUDA,HSA
ATTRIBUTE_PEAK_TENSOR, // CUDA
ATTRIBUTE_EUS, // Intel
ATTRIBUTE_GT, // Intel
}; };
static const char* ATTRIBUTE_FIELDS [] = { static const AttributeField ATTRIBUTE_INFO[] = {
"Name:", { ATTRIBUTE_NAME, "Name:", "Name:" },
"GPU processor:", { ATTRIBUTE_CHIP, "GPU processor:", "Processor:" },
"Microarchitecture:", { ATTRIBUTE_UARCH, "Microarchitecture:", "uArch:" },
"Technology:", { ATTRIBUTE_TECHNOLOGY, "Technology:", "Technology:" },
"Graphics Tier:", { ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" },
"Max Frequency:", { ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" },
"SMs:", { ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" },
"Cores/SM:", { ATTRIBUTE_MATRIX_CORES, "Matrix Cores: ", "Matrix Cores:" },
"CUDA Cores:", { ATTRIBUTE_XCDS, "XCDs:", "XCDs" },
"Tensor Cores:", { ATTRIBUTE_LDS_SIZE, "LDS size:", "LDS:" },
"Execution Units:", { ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" },
"L2 Size:", { ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" },
"Memory:", { ATTRIBUTE_CUDA_CORES, "CUDA Cores:", "CUDA Cores:" },
"Memory frequency:", { ATTRIBUTE_TENSOR_CORES, "Tensor Cores:", "Tensor Cores:" },
"Bus width:", { ATTRIBUTE_L2, "L2 Size:", "L2 Size:" },
"Peak Performance:", { ATTRIBUTE_MEMORY, "Memory:", "Memory:" },
"Peak Performance (MMA):", { ATTRIBUTE_MEMORY_FREQ, "Memory frequency:", "Memory freq.:" },
}; { ATTRIBUTE_BUS_WIDTH, "Bus width:", "Bus width:" },
{ ATTRIBUTE_PEAK_TENSOR, "Peak Performance (MMA):", "Peak Perf.(MMA):" },
static const char* ATTRIBUTE_FIELDS_SHORT [] = { { ATTRIBUTE_EUS, "Execution Units:", "EUs:" },
"Name:", { ATTRIBUTE_GT, "Graphics Tier:", "GT:" },
"Processor:",
"uArch:",
"Technology:",
"GT:",
"Max Freq.:",
"SMs:",
"Cores/SM:",
"CUDA Cores:",
"Tensor Cores:",
"EUs:",
"L2 Size:",
"Memory:",
"Memory freq.:",
"Bus width:",
"Peak Perf.:",
"Peak Perf.(MMA):",
}; };
struct terminal { struct terminal {
@@ -207,8 +203,6 @@ bool ascii_fits_screen(int termw, struct ascii_logo logo, int lf) {
void replace_bgbyfg_color(struct ascii_logo* logo) { void replace_bgbyfg_color(struct ascii_logo* logo) {
// Replace background by foreground color // Replace background by foreground color
for(int i=0; i < 2; i++) { for(int i=0; i < 2; i++) {
if(logo->color_ascii[i] == NULL) break;
if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK); if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED); else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN); else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
@@ -276,13 +270,14 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
} }
} }
uint32_t longest_attribute_length(struct ascii* art, const char** attribute_fields) { uint32_t longest_attribute_length(struct ascii* art, bool use_short) {
uint32_t max = 0; uint32_t max = 0;
uint64_t len = 0; uint64_t len = 0;
for(uint32_t i=0; i < art->n_attributes_set; i++) { for(uint32_t i=0; i < art->n_attributes_set; i++) {
if(art->attributes[i]->value != NULL) { if(art->attributes[i]->value != NULL) {
len = strlen(attribute_fields[art->attributes[i]->type]); const char* str = use_short ? ATTRIBUTE_INFO[art->attributes[i]->type].shortname : ATTRIBUTE_INFO[art->attributes[i]->type].name;
len = strlen(str);
if(len > max) max = len; if(len > max) max = len;
} }
} }
@@ -306,7 +301,7 @@ uint32_t longest_field_length(struct ascii* art, int la) {
return max; return max;
} }
void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, const char** attribute_fields) { void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, bool use_short) {
struct ascii_logo* logo = art->art; struct ascii_logo* logo = art->art;
int attr_to_print = 0; int attr_to_print = 0;
int attr_type; int attr_type;
@@ -350,11 +345,13 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
attr_value = art->attributes[attr_to_print]->value; attr_value = art->attributes[attr_to_print]->value;
attr_to_print++; attr_to_print++;
space_right = 1 + (la - strlen(attribute_fields[attr_type])); const char* attr_str = use_short ? ATTRIBUTE_INFO[attr_type].shortname : ATTRIBUTE_INFO[attr_type].name;
space_right = 1 + (la - strlen(attr_str));
current_space = max(0, text_space); current_space = max(0, text_space);
printf("%s%.*s%s", logo->color_text[0], current_space, attribute_fields[attr_type], art->reset); printf("%s%.*s%s", logo->color_text[0], current_space, attr_str, art->reset);
current_space = max(0, current_space - (int) strlen(attribute_fields[attr_type])); current_space = max(0, current_space - (int) strlen(attr_str));
printf("%*s", min(current_space, space_right), ""); printf("%*s", min(current_space, space_right), "");
current_space = max(0, current_space - min(current_space, space_right)); current_space = max(0, current_space - min(current_space, space_right));
printf("%s%.*s%s", logo->color_text[1], current_space, attr_value, art->reset); printf("%s%.*s%s", logo->color_text[1], current_space, attr_value, art->reset);
@@ -388,19 +385,19 @@ bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, stru
setAttribute(art, ATTRIBUTE_EUS, eus); setAttribute(art, ATTRIBUTE_EUS, eus);
setAttribute(art, ATTRIBUTE_PEAK, pp); setAttribute(art, ATTRIBUTE_PEAK, pp);
const char** attribute_fields = ATTRIBUTE_FIELDS; bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields); uint32_t longest_attribute = longest_attribute_length(art, use_short);
uint32_t longest_field = longest_field_length(art, longest_attribute); uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field); choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) { if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit // Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr // Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT; use_short = true;
longest_attribute = longest_attribute_length(art, attribute_fields); longest_attribute = longest_attribute_length(art, use_short);
} }
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields); print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);
return true; return true;
} }
@@ -457,19 +454,19 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor); setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
} }
const char** attribute_fields = ATTRIBUTE_FIELDS; bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields); uint32_t longest_attribute = longest_attribute_length(art, use_short);
uint32_t longest_field = longest_field_length(art, longest_attribute); uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field); choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) { if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit // Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr // Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT; use_short = true;
longest_attribute = longest_attribute_length(art, attribute_fields); longest_attribute = longest_attribute_length(art, use_short);
} }
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields); print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);
free(manufacturing_process); free(manufacturing_process);
free(max_frequency); free(max_frequency);
@@ -494,8 +491,13 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
char* gpu_chip = get_str_chip(gpu->arch); char* gpu_chip = get_str_chip(gpu->arch);
char* uarch = get_str_uarch_hsa(gpu->arch); char* uarch = get_str_uarch_hsa(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch); char* manufacturing_process = get_str_process(gpu->arch);
char* sms = get_str_cu(gpu); char* cus = get_str_cu(gpu);
char* matrix_cores = get_str_matrix_cores(gpu);
char* xcds = get_str_xcds(gpu);
char* max_frequency = get_str_freq(gpu); char* max_frequency = get_str_freq(gpu);
char* bus_width = get_str_bus_width(gpu);
char* mem_size = get_str_memory_size(gpu);
char* lds_size = get_str_lds_size(gpu);
setAttribute(art, ATTRIBUTE_NAME, gpu_name); setAttribute(art, ATTRIBUTE_NAME, gpu_name);
if (gpu_chip != NULL) { if (gpu_chip != NULL) {
@@ -504,21 +506,28 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
setAttribute(art, ATTRIBUTE_UARCH, uarch); setAttribute(art, ATTRIBUTE_UARCH, uarch);
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_STREAMINGMP, sms); setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus);
setAttribute(art, ATTRIBUTE_MATRIX_CORES, matrix_cores);
if (xcds != NULL) {
setAttribute(art, ATTRIBUTE_XCDS, xcds);
}
setAttribute(art, ATTRIBUTE_LDS_SIZE, lds_size);
setAttribute(art, ATTRIBUTE_MEMORY, mem_size);
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
const char** attribute_fields = ATTRIBUTE_FIELDS; bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields); uint32_t longest_attribute = longest_attribute_length(art, use_short);
uint32_t longest_field = longest_field_length(art, longest_attribute); uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field); choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) { if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit // Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr // Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT; use_short = true;
longest_attribute = longest_attribute_length(art, attribute_fields); longest_attribute = longest_attribute_length(art, use_short);
} }
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields); print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);
free(art->attributes); free(art->attributes);
free(art); free(art);

View File

@@ -22,7 +22,16 @@ struct agent_info {
char vendor_name[64]; char vendor_name[64];
char device_mkt_name[64]; char device_mkt_name[64];
uint32_t max_clock_freq; uint32_t max_clock_freq;
// Memory
uint32_t bus_width;
uint32_t lds_size;
uint64_t global_size;
// Topology
uint32_t compute_unit; uint32_t compute_unit;
uint32_t num_shader_engines;
uint32_t simds_per_cu;
uint32_t num_xcc; // Acccelerator Complex Dies (XCDs)
uint32_t matrix_cores; // Cores with WMMA/MFMA capabilities
}; };
#define RET_IF_HSA_ERR(err) { \ #define RET_IF_HSA_ERR(err) { \
@@ -40,6 +49,51 @@ struct agent_info {
} \ } \
} }
hsa_status_t memory_pool_callback(hsa_amd_memory_pool_t pool, void* data) {
struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
hsa_amd_segment_t segment;
hsa_status_t err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
RET_IF_HSA_ERR(err);
if (segment == HSA_AMD_SEGMENT_GROUP) {
// LDS memory
// We want to make sure that this memory pool is not repeated.
if (info->lds_size != 0) {
printErr("Found HSA_AMD_SEGMENT_GROUP twice!");
return HSA_STATUS_ERROR;
}
uint32_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
RET_IF_HSA_ERR(err);
info->lds_size = size;
}
else if (segment == HSA_AMD_SEGMENT_GLOBAL) {
// Global memory
uint32_t global_flags = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flags);
RET_IF_HSA_ERR(err);
if (global_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) {
if (info->global_size != 0) {
printErr("Found HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED twice!");
return HSA_STATUS_ERROR;
}
uint64_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
RET_IF_HSA_ERR(err);
info->global_size = size;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t agent_callback(hsa_agent_t agent, void *data) { hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
struct agent_info* info = reinterpret_cast<struct agent_info *>(data); struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
@@ -62,6 +116,26 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit);
RET_IF_HSA_ERR(err); RET_IF_HSA_ERR(err);
// According to the documentation, this is deprecated. But what should I be using then?
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &info->num_shader_engines);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &info->simds_per_cu);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_XCC, &info->num_xcc);
RET_IF_HSA_ERR(err);
// We will check against zero to see if it was set beforehand.
info->global_size = 0;
info->lds_size = 0;
// This will fill global_size and lds_size.
err = hsa_amd_agent_iterate_memory_pools(agent, memory_pool_callback, data);
RET_IF_HSA_ERR(err);
} }
return HSA_STATUS_SUCCESS; return HSA_STATUS_SUCCESS;
@@ -71,10 +145,26 @@ struct topology_h* get_topology_info(struct agent_info info) {
struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h)); struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h));
topo->compute_units = info.compute_unit; topo->compute_units = info.compute_unit;
topo->num_shader_engines = info.num_shader_engines; // not printed at the moment
topo->simds_per_cu = info.simds_per_cu; // not printed at the moment
topo->num_xcc = info.num_xcc;
// Old GPUs (GCN I guess) might not have matrix cores.
// Not sure what would happen here?
topo->matrix_cores = topo->compute_units * topo->simds_per_cu;
return topo; return topo;
} }
struct memory* get_memory_info(struct gpu_info* gpu, struct agent_info info) {
struct memory* mem = (struct memory*) emalloc(sizeof(struct memory));
mem->bus_width = info.bus_width;
mem->lds_size = info.lds_size;
mem->size_bytes = info.global_size;
return mem;
}
struct gpu_info* get_gpu_info_hsa(int gpu_idx) { struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL; gpu->pci = NULL;
@@ -118,6 +208,7 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1)); gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1));
strcpy(gpu->name, info.device_mkt_name); strcpy(gpu->name, info.device_mkt_name);
gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name); gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name);
gpu->mem = get_memory_info(gpu, info);
if (gpu->arch == NULL) { if (gpu->arch == NULL) {
return NULL; return NULL;
@@ -135,3 +226,17 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
char* get_str_cu(struct gpu_info* gpu) { char* get_str_cu(struct gpu_info* gpu) {
return get_str_generic(gpu->topo_h->compute_units); return get_str_generic(gpu->topo_h->compute_units);
} }
char* get_str_xcds(struct gpu_info* gpu) {
// If there is a single XCD, then we dont want to
// print it.
if (gpu->topo_h->num_xcc == 1) {
return NULL;
}
return get_str_generic(gpu->topo_h->num_xcc);
}
char* get_str_matrix_cores(struct gpu_info* gpu) {
// TODO: Show XX (WMMA/MFMA)
return get_str_generic(gpu->topo_h->matrix_cores);
}

View File

@@ -5,5 +5,7 @@
struct gpu_info* get_gpu_info_hsa(int gpu_idx); struct gpu_info* get_gpu_info_hsa(int gpu_idx);
char* get_str_cu(struct gpu_info* gpu); char* get_str_cu(struct gpu_info* gpu);
char* get_str_xcds(struct gpu_info* gpu);
char* get_str_matrix_cores(struct gpu_info* gpu);
#endif #endif