From b5dc30d4b384cdfb8f264e036748ea0c641a9668 Mon Sep 17 00:00:00 2001
From: Dr-Noob <peibolms@gmail.com>
Date: Sun, 26 Oct 2025 10:42:25 +0100
Subject: [PATCH] Add matrix cores

---
 src/common/printer.cpp | 4 ++++
 src/hsa/hsa.cpp        | 9 +++++++++
 src/hsa/hsa.hpp        | 1 +
 3 files changed, 14 insertions(+)

diff --git a/src/common/printer.cpp b/src/common/printer.cpp
index 02a840b..00392a1 100644
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -48,6 +48,7 @@ enum {
   ATTRIBUTE_FREQUENCY,     // ALL
   ATTRIBUTE_PEAK,          // ALL
   ATTRIBUTE_COMPUTE_UNITS, // HSA
+  ATTRIBUTE_MATRIX_CORES,  // HSA
   ATTRIBUTE_XCDS,          // HSA
   ATTRIBUTE_LDS_SIZE,      // HSA
   ATTRIBUTE_STREAMINGMP,   // CUDA
@@ -71,6 +72,7 @@ static const AttributeField ATTRIBUTE_INFO[] = {
   { ATTRIBUTE_FREQUENCY,     "Max Frequency:",          "Max Freq.:" },
   { ATTRIBUTE_PEAK,          "Peak Performance:",       "Peak Perf.:" },
   { ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):",    "CUs" },
+  { ATTRIBUTE_MATRIX_CORES,  "Matrix Cores: ",          "Matrix Cores:" },
   { ATTRIBUTE_XCDS,          "XCDs:",                   "XCDs" },
   { ATTRIBUTE_LDS_SIZE,      "LDS size:",               "LDS:" },
   { ATTRIBUTE_STREAMINGMP,   "SMs:",                    "SMs:" },
@@ -490,6 +492,7 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
   char* uarch = get_str_uarch_hsa(gpu->arch);
   char* manufacturing_process = get_str_process(gpu->arch);
   char* cus = get_str_cu(gpu);
+  char* matrix_cores = get_str_matrix_cores(gpu);
   char* xcds = get_str_xcds(gpu);
   char* max_frequency = get_str_freq(gpu);
   char* bus_width = get_str_bus_width(gpu);
@@ -504,6 +507,7 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
   setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
   setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
   setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus);
+  setAttribute(art, ATTRIBUTE_MATRIX_CORES, matrix_cores);
   if (xcds != NULL) {
     setAttribute(art, ATTRIBUTE_XCDS, xcds);
   }
diff --git a/src/hsa/hsa.cpp b/src/hsa/hsa.cpp
index 414e700..67dc7af 100644
--- a/src/hsa/hsa.cpp
+++ b/src/hsa/hsa.cpp
@@ -31,6 +31,7 @@ struct agent_info {
   uint32_t num_shader_engines;
   uint32_t simds_per_cu;
   uint32_t num_xcc;            // Acccelerator Complex Dies (XCDs)
+  uint32_t matrix_cores;       // Cores with WMMA/MFMA capabilities
 };
 
 #define RET_IF_HSA_ERR(err) { \
@@ -149,6 +150,9 @@ struct topology_h* get_topology_info(struct agent_info info) {
   topo->num_shader_engines = info.num_shader_engines; // not printed at the moment
   topo->simds_per_cu = info.simds_per_cu;             // not printed at the moment
   topo->num_xcc = info.num_xcc;
+  // Old GPUs (GCN I guess) might not have matrix cores.
+  // Not sure what would happen here?
+  topo->matrix_cores = topo->compute_units * topo->simds_per_cu;
 
   return topo;
 }
@@ -232,4 +236,9 @@ char* get_str_xcds(struct gpu_info* gpu) {
     return NULL;
   }
   return get_str_generic(gpu->topo_h->num_xcc);
+}
+
+char* get_str_matrix_cores(struct gpu_info* gpu) {
+  // TODO: Show XX (WMMA/MFMA)
+  return get_str_generic(gpu->topo_h->matrix_cores);
 }
\ No newline at end of file
diff --git a/src/hsa/hsa.hpp b/src/hsa/hsa.hpp
index c5bc387..45c3363 100644
--- a/src/hsa/hsa.hpp
+++ b/src/hsa/hsa.hpp
@@ -6,5 +6,6 @@
 struct gpu_info* get_gpu_info_hsa(int gpu_idx);
 char* get_str_cu(struct gpu_info* gpu);
 char* get_str_xcds(struct gpu_info* gpu);
+char* get_str_matrix_cores(struct gpu_info* gpu);
 
 #endif