8 Commits

Author SHA1 Message Date
Dr-Noob
e0c843274c Cleaning memory_pool_callback 2025-10-23 21:19:27 +02:00
Dr-Noob
b543b23f60 Fixes 2025-10-23 21:12:52 +02:00
Dr-Noob
9b519828f4 I guess we can rely on HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED 2025-10-23 21:11:25 +02:00
Dr-Noob
3b567b9381 Ok so this is not reporting the actual size, Ill need to investigate why 2025-10-17 09:05:13 +02:00
Dr-Noob
e1f03c4e04 Print global memory size 2025-10-17 08:48:15 +02:00
Dr-Noob
046f8c1299 Fixes 2025-10-17 08:41:21 +02:00
Dr-Noob
b434fc6fd0 Printer support 2025-10-17 08:38:37 +02:00
Dr-Noob
5beccaebb0 Adding more info 2025-10-17 08:31:26 +02:00
4 changed files with 1 additions and 60 deletions

View File

@@ -46,10 +46,6 @@ struct topology_c {
// HSA topology
struct topology_h {
int32_t compute_units;
int32_t num_shader_engines;
int32_t simds_per_cu;
int32_t num_xcc;
int32_t matrix_cores;
};
// Intel topology

View File

@@ -48,8 +48,6 @@ enum {
ATTRIBUTE_FREQUENCY, // ALL
ATTRIBUTE_PEAK, // ALL
ATTRIBUTE_COMPUTE_UNITS, // HSA
ATTRIBUTE_MATRIX_CORES, // HSA
ATTRIBUTE_XCDS, // HSA
ATTRIBUTE_LDS_SIZE, // HSA
ATTRIBUTE_STREAMINGMP, // CUDA
ATTRIBUTE_CORESPERMP, // CUDA
@@ -72,8 +70,6 @@ static const AttributeField ATTRIBUTE_INFO[] = {
{ ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" },
{ ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" },
{ ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" },
{ ATTRIBUTE_MATRIX_CORES, "Matrix Cores: ", "Matrix Cores:" },
{ ATTRIBUTE_XCDS, "XCDs:", "XCDs" },
{ ATTRIBUTE_LDS_SIZE, "LDS size:", "LDS:" },
{ ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" },
{ ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" },
@@ -492,8 +488,6 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
char* uarch = get_str_uarch_hsa(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch);
char* cus = get_str_cu(gpu);
char* matrix_cores = get_str_matrix_cores(gpu);
char* xcds = get_str_xcds(gpu);
char* max_frequency = get_str_freq(gpu);
char* bus_width = get_str_bus_width(gpu);
char* mem_size = get_str_memory_size(gpu);
@@ -507,10 +501,6 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus);
setAttribute(art, ATTRIBUTE_MATRIX_CORES, matrix_cores);
if (xcds != NULL) {
setAttribute(art, ATTRIBUTE_XCDS, xcds);
}
setAttribute(art, ATTRIBUTE_LDS_SIZE, lds_size);
setAttribute(art, ATTRIBUTE_MEMORY, mem_size);
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);

View File

@@ -22,16 +22,10 @@ struct agent_info {
char vendor_name[64];
char device_mkt_name[64];
uint32_t max_clock_freq;
// Memory
uint32_t compute_unit;
uint32_t bus_width;
uint32_t lds_size;
uint64_t global_size;
// Topology
uint32_t compute_unit;
uint32_t num_shader_engines;
uint32_t simds_per_cu;
uint32_t num_xcc; // Acccelerator Complex Dies (XCDs)
uint32_t matrix_cores; // Cores with WMMA/MFMA capabilities
};
#define RET_IF_HSA_ERR(err) { \
@@ -58,11 +52,6 @@ hsa_status_t memory_pool_callback(hsa_amd_memory_pool_t pool, void* data) {
if (segment == HSA_AMD_SEGMENT_GROUP) {
// LDS memory
// We want to make sure that this memory pool is not repeated.
if (info->lds_size != 0) {
printErr("Found HSA_AMD_SEGMENT_GROUP twice!");
return HSA_STATUS_ERROR;
}
uint32_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
@@ -121,19 +110,7 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &info->num_shader_engines);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &info->simds_per_cu);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_XCC, &info->num_xcc);
RET_IF_HSA_ERR(err);
// We will check against zero to see if it was set beforehand.
info->global_size = 0;
info->lds_size = 0;
// This will fill global_size and lds_size.
err = hsa_amd_agent_iterate_memory_pools(agent, memory_pool_callback, data);
RET_IF_HSA_ERR(err);
}
@@ -145,12 +122,6 @@ struct topology_h* get_topology_info(struct agent_info info) {
struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h));
topo->compute_units = info.compute_unit;
topo->num_shader_engines = info.num_shader_engines; // not printed at the moment
topo->simds_per_cu = info.simds_per_cu; // not printed at the moment
topo->num_xcc = info.num_xcc;
// Old GPUs (GCN I guess) might not have matrix cores.
// Not sure what would happen here?
topo->matrix_cores = topo->compute_units * topo->simds_per_cu;
return topo;
}
@@ -226,17 +197,3 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
char* get_str_cu(struct gpu_info* gpu) {
return get_str_generic(gpu->topo_h->compute_units);
}
char* get_str_xcds(struct gpu_info* gpu) {
// If there is a single XCD, then we dont want to
// print it.
if (gpu->topo_h->num_xcc == 1) {
return NULL;
}
return get_str_generic(gpu->topo_h->num_xcc);
}
char* get_str_matrix_cores(struct gpu_info* gpu) {
// TODO: Show XX (WMMA/MFMA)
return get_str_generic(gpu->topo_h->matrix_cores);
}

View File

@@ -5,7 +5,5 @@
struct gpu_info* get_gpu_info_hsa(int gpu_idx);
char* get_str_cu(struct gpu_info* gpu);
char* get_str_xcds(struct gpu_info* gpu);
char* get_str_matrix_cores(struct gpu_info* gpu);
#endif