Remove TODO

Fix
Add matrix cores
2025-10-26 10:47:27 +01:00 · 2025-10-26 10:44:09 +01:00 · 2025-10-26 10:42:25 +01:00 · 2025-10-26 10:28:41 +01:00 · 2025-10-26 10:27:51 +01:00 · 2025-10-23 21:40:14 +02:00
5 changed files with 213 additions and 80 deletions
--- a/src/common/gpu.cpp
+++ b/src/common/gpu.cpp
@@ -101,6 +101,17 @@ char* get_str_bus_width(struct gpu_info* gpu) {
  return string;
 }

+char* get_str_lds_size(struct gpu_info* gpu) {
+  // TODO: Show XX KB (XX MB Total) like in cpufetch
+  uint32_t size = 3+1+3+1;
+  assert(strlen(STRING_UNKNOWN)+1 <= size);
+  char* string = (char *) ecalloc(size, sizeof(char));
+
+  sprintf(string, "%d KB", gpu->mem->lds_size / 1024);
+
+  return string;
+}
+
 char* get_str_memory_clock(struct gpu_info* gpu) {
  return get_freq_as_str_mhz(gpu->mem->freq);
 }
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -46,6 +46,10 @@ struct topology_c {
 // HSA topology
 struct topology_h {
  int32_t compute_units;
+  int32_t num_shader_engines;
+  int32_t simds_per_cu;
+  int32_t num_xcc;
+  int32_t matrix_cores;
 };

 // Intel topology
@@ -61,6 +65,7 @@ struct memory {
  int32_t bus_width;
  int32_t freq;
  int32_t clk_mul; // clock multiplier
+  int32_t lds_size; // HSA specific for now
 };

 struct gpu_info {
@@ -88,6 +93,7 @@ char* get_str_freq(struct gpu_info* gpu);
 char* get_str_memory_size(struct gpu_info* gpu);
 char* get_str_memory_type(struct gpu_info* gpu);
 char* get_str_bus_width(struct gpu_info* gpu);
+char* get_str_lds_size(struct gpu_info* gpu);
 char* get_str_memory_clock(struct gpu_info* gpu);
 char* get_str_l2(struct gpu_info* gpu);
 char* get_str_peak_performance(struct gpu_info* gpu);
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -32,64 +32,60 @@
 #define MAX_ATTRIBUTES      100
 #define MAX_TERM_SIZE       1024

+typedef struct {
+  int id;
+  const char *name;
+  const char *shortname;
+} AttributeField;
+
+// AttributeField IDs
+//                         Used by
 enum {
-  ATTRIBUTE_NAME,
-  ATTRIBUTE_CHIP,
-  ATTRIBUTE_UARCH,
-  ATTRIBUTE_TECHNOLOGY,
-  ATTRIBUTE_GT,
-  ATTRIBUTE_FREQUENCY,
-  ATTRIBUTE_STREAMINGMP,
-  ATTRIBUTE_CORESPERMP,
-  ATTRIBUTE_CUDA_CORES,
-  ATTRIBUTE_TENSOR_CORES,
-  ATTRIBUTE_EUS,
-  ATTRIBUTE_L2,
-  ATTRIBUTE_MEMORY,
-  ATTRIBUTE_MEMORY_FREQ,
-  ATTRIBUTE_BUS_WIDTH,
-  ATTRIBUTE_PEAK,
-  ATTRIBUTE_PEAK_TENSOR,
+  ATTRIBUTE_NAME,          // ALL
+  ATTRIBUTE_CHIP,          // ALL
+  ATTRIBUTE_UARCH,         // ALL
+  ATTRIBUTE_TECHNOLOGY,    // ALL
+  ATTRIBUTE_FREQUENCY,     // ALL
+  ATTRIBUTE_PEAK,          // ALL
+  ATTRIBUTE_COMPUTE_UNITS, // HSA
+  ATTRIBUTE_MATRIX_CORES,  // HSA
+  ATTRIBUTE_XCDS,          // HSA
+  ATTRIBUTE_LDS_SIZE,      // HSA
+  ATTRIBUTE_STREAMINGMP,   // CUDA
+  ATTRIBUTE_CORESPERMP,    // CUDA
+  ATTRIBUTE_CUDA_CORES,    // CUDA
+  ATTRIBUTE_TENSOR_CORES,  // CUDA
+  ATTRIBUTE_L2,            // CUDA
+  ATTRIBUTE_MEMORY,        // CUDA,HSA
+  ATTRIBUTE_MEMORY_FREQ,   // CUDA
+  ATTRIBUTE_BUS_WIDTH,     // CUDA,HSA
+  ATTRIBUTE_PEAK_TENSOR,   // CUDA
+  ATTRIBUTE_EUS,           // Intel
+  ATTRIBUTE_GT,            // Intel
 };

-static const char* ATTRIBUTE_FIELDS [] = {
-  "Name:",
-  "GPU processor:",
-  "Microarchitecture:",
-  "Technology:",
-  "Graphics Tier:",
-  "Max Frequency:",
-  "SMs:",
-  "Cores/SM:",
-  "CUDA Cores:",
-  "Tensor Cores:",
-  "Execution Units:",
-  "L2 Size:",
-  "Memory:",
-  "Memory frequency:",
-  "Bus width:",
-  "Peak Performance:",
-  "Peak Performance (MMA):",
-};
-
-static const char* ATTRIBUTE_FIELDS_SHORT [] = {
-  "Name:",
-  "Processor:",
-  "uArch:",
-  "Technology:",
-  "GT:",
-  "Max Freq.:",
-  "SMs:",
-  "Cores/SM:",
-  "CUDA Cores:",
-  "Tensor Cores:",
-  "EUs:",
-  "L2 Size:",
-  "Memory:",
-  "Memory freq.:",
-  "Bus width:",
-  "Peak Perf.:",
-  "Peak Perf.(MMA):",
+static const AttributeField ATTRIBUTE_INFO[] = {
+  { ATTRIBUTE_NAME,          "Name:",                   "Name:" },
+  { ATTRIBUTE_CHIP,          "GPU processor:",          "Processor:" },
+  { ATTRIBUTE_UARCH,         "Microarchitecture:",      "uArch:" },
+  { ATTRIBUTE_TECHNOLOGY,    "Technology:",             "Technology:" },
+  { ATTRIBUTE_FREQUENCY,     "Max Frequency:",          "Max Freq.:" },
+  { ATTRIBUTE_PEAK,          "Peak Performance:",       "Peak Perf.:" },
+  { ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):",    "CUs" },
+  { ATTRIBUTE_MATRIX_CORES,  "Matrix Cores: ",          "Matrix Cores:" },
+  { ATTRIBUTE_XCDS,          "XCDs:",                   "XCDs" },
+  { ATTRIBUTE_LDS_SIZE,      "LDS size:",               "LDS:" },
+  { ATTRIBUTE_STREAMINGMP,   "SMs:",                    "SMs:" },
+  { ATTRIBUTE_CORESPERMP,    "Cores/SM:",               "Cores/SM:" },
+  { ATTRIBUTE_CUDA_CORES,    "CUDA Cores:",             "CUDA Cores:" },
+  { ATTRIBUTE_TENSOR_CORES,  "Tensor Cores:",           "Tensor Cores:" },
+  { ATTRIBUTE_L2,            "L2 Size:",                "L2 Size:" },
+  { ATTRIBUTE_MEMORY,        "Memory:",                 "Memory:" },
+  { ATTRIBUTE_MEMORY_FREQ,   "Memory frequency:",       "Memory freq.:" },
+  { ATTRIBUTE_BUS_WIDTH,     "Bus width:",              "Bus width:" },
+  { ATTRIBUTE_PEAK_TENSOR,   "Peak Performance (MMA):", "Peak Perf.(MMA):" },
+  { ATTRIBUTE_EUS,           "Execution Units:",        "EUs:" },
+  { ATTRIBUTE_GT,            "Graphics Tier:",          "GT:" },
 };

 struct terminal {
@@ -207,8 +203,6 @@ bool ascii_fits_screen(int termw, struct ascii_logo logo, int lf) {
 void replace_bgbyfg_color(struct ascii_logo* logo) {
  // Replace background by foreground color
  for(int i=0; i < 2; i++) {
-    if(logo->color_ascii[i] == NULL) break;
-
    if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
    else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
    else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
@@ -276,13 +270,14 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
  }
 }

-uint32_t longest_attribute_length(struct ascii* art, const char** attribute_fields) {
+uint32_t longest_attribute_length(struct ascii* art, bool use_short) {
  uint32_t max = 0;
  uint64_t len = 0;

  for(uint32_t i=0; i < art->n_attributes_set; i++) {
    if(art->attributes[i]->value != NULL) {
-      len = strlen(attribute_fields[art->attributes[i]->type]);
+      const char* str = use_short ? ATTRIBUTE_INFO[art->attributes[i]->type].shortname : ATTRIBUTE_INFO[art->attributes[i]->type].name;
+      len = strlen(str);
      if(len > max) max = len;
    }
  }
@@ -306,7 +301,7 @@ uint32_t longest_field_length(struct ascii* art, int la) {
  return max;
 }

-void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, const char** attribute_fields) {
+void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, bool use_short) {
  struct ascii_logo* logo = art->art;
  int attr_to_print = 0;
  int attr_type;
@@ -350,11 +345,13 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
      attr_value = art->attributes[attr_to_print]->value;
      attr_to_print++;

-      space_right = 1 + (la - strlen(attribute_fields[attr_type]));
+      const char* attr_str = use_short ? ATTRIBUTE_INFO[attr_type].shortname : ATTRIBUTE_INFO[attr_type].name;
+
+      space_right = 1 + (la - strlen(attr_str));
      current_space = max(0, text_space);

-      printf("%s%.*s%s", logo->color_text[0], current_space, attribute_fields[attr_type], art->reset);
-      current_space = max(0, current_space - (int) strlen(attribute_fields[attr_type]));
+      printf("%s%.*s%s", logo->color_text[0], current_space, attr_str, art->reset);
+      current_space = max(0, current_space - (int) strlen(attr_str));
      printf("%*s", min(current_space, space_right), "");
      current_space = max(0, current_space - min(current_space, space_right));
      printf("%s%.*s%s", logo->color_text[1], current_space, attr_value, art->reset);
@@ -388,19 +385,19 @@ bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, stru
  setAttribute(art, ATTRIBUTE_EUS, eus);
  setAttribute(art, ATTRIBUTE_PEAK, pp);

-  const char** attribute_fields = ATTRIBUTE_FIELDS;
-  uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
+  bool use_short = false;
+  uint32_t longest_attribute = longest_attribute_length(art, use_short);
  uint32_t longest_field = longest_field_length(art, longest_attribute);
  choose_ascii_art(art, cs, term, longest_field);

  if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
    // Despite of choosing the smallest logo, the output does not fit
    // Choose the shorter field names and recalculate the longest attr
-    attribute_fields = ATTRIBUTE_FIELDS_SHORT;
-    longest_attribute = longest_attribute_length(art, attribute_fields);
+    use_short = true;
+    longest_attribute = longest_attribute_length(art, use_short);
  }

-  print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
+  print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);

  return true;
 }
@@ -457,19 +454,19 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
    setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
  }

-  const char** attribute_fields = ATTRIBUTE_FIELDS;
-  uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
+  bool use_short = false;
+  uint32_t longest_attribute = longest_attribute_length(art, use_short);
  uint32_t longest_field = longest_field_length(art, longest_attribute);
  choose_ascii_art(art, cs, term, longest_field);

  if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
    // Despite of choosing the smallest logo, the output does not fit
    // Choose the shorter field names and recalculate the longest attr
-    attribute_fields = ATTRIBUTE_FIELDS_SHORT;
-    longest_attribute = longest_attribute_length(art, attribute_fields);
+    use_short = true;
+    longest_attribute = longest_attribute_length(art, use_short);
  }

-  print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
+  print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);

  free(manufacturing_process);
  free(max_frequency);
@@ -494,8 +491,13 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
  char* gpu_chip = get_str_chip(gpu->arch);
  char* uarch = get_str_uarch_hsa(gpu->arch);
  char* manufacturing_process = get_str_process(gpu->arch);
-  char* sms = get_str_cu(gpu);
+  char* cus = get_str_cu(gpu);
+  char* matrix_cores = get_str_matrix_cores(gpu);
+  char* xcds = get_str_xcds(gpu);
  char* max_frequency = get_str_freq(gpu);
+  char* bus_width = get_str_bus_width(gpu);
+  char* mem_size = get_str_memory_size(gpu);
+  char* lds_size = get_str_lds_size(gpu);

  setAttribute(art, ATTRIBUTE_NAME, gpu_name);
  if (gpu_chip != NULL) {
@@ -504,21 +506,28 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
  setAttribute(art, ATTRIBUTE_UARCH, uarch);
  setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
  setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
-  setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
+  setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus);
+  setAttribute(art, ATTRIBUTE_MATRIX_CORES, matrix_cores);
+  if (xcds != NULL) {
+    setAttribute(art, ATTRIBUTE_XCDS, xcds);
+  }
+  setAttribute(art, ATTRIBUTE_LDS_SIZE, lds_size);
+  setAttribute(art, ATTRIBUTE_MEMORY, mem_size);
+  setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);

-  const char** attribute_fields = ATTRIBUTE_FIELDS;
-  uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
+  bool use_short = false;
+  uint32_t longest_attribute = longest_attribute_length(art, use_short);
  uint32_t longest_field = longest_field_length(art, longest_attribute);
  choose_ascii_art(art, cs, term, longest_field);

  if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
    // Despite of choosing the smallest logo, the output does not fit
    // Choose the shorter field names and recalculate the longest attr
-    attribute_fields = ATTRIBUTE_FIELDS_SHORT;
-    longest_attribute = longest_attribute_length(art, attribute_fields);
+    use_short = true;
+    longest_attribute = longest_attribute_length(art, use_short);
  }

-  print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
+  print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);

  free(art->attributes);
  free(art);
--- a/src/hsa/hsa.cpp
+++ b/src/hsa/hsa.cpp
@@ -22,7 +22,16 @@ struct agent_info {
  char vendor_name[64];
  char device_mkt_name[64];
  uint32_t max_clock_freq;
+  // Memory
+  uint32_t bus_width;
+  uint32_t lds_size;
+  uint64_t global_size;
+  // Topology
  uint32_t compute_unit;
+  uint32_t num_shader_engines;
+  uint32_t simds_per_cu;
+  uint32_t num_xcc;            // Acccelerator Complex Dies (XCDs)
+  uint32_t matrix_cores;       // Cores with WMMA/MFMA capabilities
 };

 #define RET_IF_HSA_ERR(err) { \
@@ -40,6 +49,51 @@ struct agent_info {
  }                                                                           \
 }

+hsa_status_t memory_pool_callback(hsa_amd_memory_pool_t pool, void* data) {
+  struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
+
+  hsa_amd_segment_t segment;
+  hsa_status_t err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
+  RET_IF_HSA_ERR(err);
+
+  if (segment == HSA_AMD_SEGMENT_GROUP) {
+    // LDS memory
+    // We want to make sure that this memory pool is not repeated.
+    if (info->lds_size != 0) {
+      printErr("Found HSA_AMD_SEGMENT_GROUP twice!");
+      return HSA_STATUS_ERROR;
+    }
+    uint32_t size = 0;
+
+    err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
+    RET_IF_HSA_ERR(err);
+
+    info->lds_size = size;    
+  }
+  else if (segment == HSA_AMD_SEGMENT_GLOBAL) {
+    // Global memory
+    uint32_t global_flags = 0;
+    
+    err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flags);
+    RET_IF_HSA_ERR(err);
+
+    if (global_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) {
+      if (info->global_size != 0) {
+        printErr("Found HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED twice!");
+        return HSA_STATUS_ERROR;
+      }
+
+      uint64_t size = 0;
+
+      err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
+      RET_IF_HSA_ERR(err);
+
+      info->global_size = size;
+    }    
+  }
+  return HSA_STATUS_SUCCESS;
+}
+
 hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
  struct agent_info* info = reinterpret_cast<struct agent_info *>(data);

@@ -62,6 +116,26 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) {

    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit);
    RET_IF_HSA_ERR(err);
+
+    // According to the documentation, this is deprecated. But what should I be using then?
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width);
+    RET_IF_HSA_ERR(err);
+
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &info->num_shader_engines);
+    RET_IF_HSA_ERR(err);
+
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &info->simds_per_cu);
+    RET_IF_HSA_ERR(err);
+
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_XCC, &info->num_xcc);
+    RET_IF_HSA_ERR(err);
+
+    // We will check against zero to see if it was set beforehand.
+    info->global_size = 0;
+    info->lds_size = 0;
+    // This will fill global_size and lds_size.
+    err = hsa_amd_agent_iterate_memory_pools(agent, memory_pool_callback, data);
+    RET_IF_HSA_ERR(err);
  }

  return HSA_STATUS_SUCCESS;
@@ -71,10 +145,26 @@ struct topology_h* get_topology_info(struct agent_info info) {
  struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h));

  topo->compute_units = info.compute_unit;
+  topo->num_shader_engines = info.num_shader_engines; // not printed at the moment
+  topo->simds_per_cu = info.simds_per_cu;             // not printed at the moment
+  topo->num_xcc = info.num_xcc;
+  // Old GPUs (GCN I guess) might not have matrix cores.
+  // Not sure what would happen here?
+  topo->matrix_cores = topo->compute_units * topo->simds_per_cu;

  return topo;
 }

+struct memory* get_memory_info(struct gpu_info* gpu, struct agent_info info) {
+  struct memory* mem = (struct memory*) emalloc(sizeof(struct memory));
+  
+  mem->bus_width = info.bus_width;
+  mem->lds_size = info.lds_size;
+  mem->size_bytes = info.global_size;
+
+  return mem;
+}
+
 struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
  struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
  gpu->pci = NULL;
@@ -118,6 +208,7 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
  gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1));
  strcpy(gpu->name, info.device_mkt_name);
  gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name);
+  gpu->mem = get_memory_info(gpu, info);

  if (gpu->arch == NULL) {
    return NULL;
@@ -135,3 +226,17 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
 char* get_str_cu(struct gpu_info* gpu) {
  return get_str_generic(gpu->topo_h->compute_units);
 }
+
+char* get_str_xcds(struct gpu_info* gpu) {
+  // If there is a single XCD, then we dont want to
+  // print it.
+  if (gpu->topo_h->num_xcc == 1) {
+    return NULL;
+  }
+  return get_str_generic(gpu->topo_h->num_xcc);
+}
+
+char* get_str_matrix_cores(struct gpu_info* gpu) {
+  // TODO: Show XX (WMMA/MFMA)
+  return get_str_generic(gpu->topo_h->matrix_cores);
+}
--- a/src/hsa/hsa.hpp
+++ b/src/hsa/hsa.hpp
@@ -5,5 +5,7 @@

 struct gpu_info* get_gpu_info_hsa(int gpu_idx);
 char* get_str_cu(struct gpu_info* gpu);
+char* get_str_xcds(struct gpu_info* gpu);
+char* get_str_matrix_cores(struct gpu_info* gpu);

 #endif
Author	SHA1	Message	Date
Dr-Noob	84e6021a95	Remove TODO	2025-10-26 10:47:27 +01:00
Dr-Noob	a4916255cf	Fix	2025-10-26 10:44:09 +01:00
Dr-Noob	b5dc30d4b3	Add matrix cores	2025-10-26 10:42:25 +01:00
Dr-Noob	2fa90179b4	Fix	2025-10-26 10:28:41 +01:00
Dr-Noob	711936be81	Show XCDs	2025-10-26 10:27:51 +01:00
Dr-Noob	94a9a440f0	Basic support	2025-10-23 21:40:14 +02:00
Dr-Noob	78d34e71f1	[v0.30][AMD] Add support to fetch bus width, global memory and LDS size We can use hsa_amd_agent_iterate_memory_pools to fetch info about GPU memory pools in the GPU. HSA_AMD_SEGMENT_GROUP seems to be LDS, and HSA_AMD_SEGMENT_GLOBAL seems to be global memory. However, the latter is reported multiple times (I don't know why). The only solution I found for this is to check for the HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED flag, which seems to be reported only once. For bus width, we simply use HSA_AMD_AGENT_INFO_MEMORY_WIDTH.	2025-10-23 21:30:02 +02:00
Dr-Noob	82ea16fc3d	[v0.30] Fix warning in printer	2025-10-16 20:01:14 +02:00
Dr-Noob	6589de9717	[v0.30] Reorganize attributes in printer and add CUs attr for AMD	2025-10-16 19:53:48 +02:00
Dr-Noob	0950b97393	[v0.30] Build pciutils only if neccesary If only HSA is enabled we dont need pciutils since AMD detection does not rely on it. Therefore we change CMakeLists.txt to build pciutils only if required. This commit has some side-effects: 1. We now don't build Intel backend by default. In other words, no backend is built by default, the user must specify which backend to use. 2. There were some issues with includes and wrongly used defines and variables. This commit fixes all that.	2025-10-16 08:26:42 +02:00