Show chip name

These changes should go later in another commit - lets keep this HSA only
Include common uarch.h in all backends (fixes include issue)
2025-10-15 08:21:10 +02:00 · 2025-10-15 07:45:34 +02:00 · 2025-10-15 07:36:36 +02:00 · 2025-10-15 07:31:49 +02:00 · 2025-10-15 07:29:53 +02:00 · 2025-10-15 07:28:15 +02:00
7 changed files with 407 additions and 16 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,7 +127,7 @@ endif()
 if(ENABLE_HSA_BACKEND)
  target_compile_definitions(gpufetch PUBLIC BACKEND_HSA)

-  add_library(hsa_backend STATIC ${HSA_DIR}/hsa.cpp)
+  add_library(hsa_backend STATIC ${HSA_DIR}/hsa.cpp ${HSA_DIR}/uarch.cpp)

  if(NOT ${PCIUTILS_FOUND})
    add_dependencies(hsa_backend pciutils)
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -11,6 +11,7 @@
 #include "../intel/uarch.hpp"
 #include "../intel/intel.hpp"
 #include "../hsa/hsa.hpp"
+#include "../hsa/uarch.hpp"
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"

@@ -490,10 +491,18 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
    return false;

  char* gpu_name = get_str_gpu_name(gpu);
+  char* gpu_chip = get_str_chip(gpu->arch);
+  char* uarch = get_str_uarch_hsa(gpu->arch);
+  char* manufacturing_process = get_str_process(gpu->arch);
  char* sms = get_str_cu(gpu);
  char* max_frequency = get_str_freq(gpu);

  setAttribute(art, ATTRIBUTE_NAME, gpu_name);
+  if (gpu_chip != NULL) {
+    setAttribute(art, ATTRIBUTE_CHIP, gpu_chip);
+  }
+  setAttribute(art, ATTRIBUTE_UARCH, uarch);
+  setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
  setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
  setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);

--- a/src/common/uarch.hpp
+++ b/src/common/uarch.hpp
@@ -16,6 +16,9 @@ struct uarch {
  int32_t cc_minor;
  int32_t compute_capability;

+  // HSA specific
+  int32_t llvm_target;
+
  // Intel specific
  int32_t gt;
  int32_t eu;
--- a/src/hsa/chips.hpp
+++ b/src/hsa/chips.hpp
@@ -0,0 +1,37 @@
+#ifndef __HSA_GPUCHIPS__
+#define __HSA_GPUCHIPS__
+
+typedef uint32_t GPUCHIP;
+
+enum {
+  CHIP_UNKNOWN_HSA,
+  // VEGA (TODO)
+  // ...
+  // RDNA
+  CHIP_NAVI_10,
+  CHIP_NAVI_12,
+  CHIP_NAVI_14,
+  // RDNA2
+  // There are way more (eg Oberon)
+  // Maybe we'll add them in the future.
+  CHIP_NAVI_21,
+  CHIP_NAVI_22,
+  CHIP_NAVI_23,
+  CHIP_NAVI_24,
+  // RDNA3
+  // There are way more as well.
+  // Supporting Navi only for now.
+  CHIP_NAVI_31,
+  CHIP_NAVI_32,
+  CHIP_NAVI_33,
+  // RDNA4
+  CHIP_NAVI_44,
+  CHIP_NAVI_48,
+  // CDNA
+  CHIP_ARCTURUS,      // MI100 series
+  CHIP_ALDEBARAN,     // MI200 series
+  CHIP_AQUA_VANJARAM, // MI300 series
+  CHIP_CDNA_NEXT      // MI350 series
+};
+
+#endif
--- a/src/hsa/hsa.cpp
+++ b/src/hsa/hsa.cpp
@@ -12,6 +12,7 @@
 #include <hsa/hsa_ext_amd.h>

 #include "hsa.hpp"
+#include "uarch.hpp"
 #include "../common/pci.hpp"
 #include "../common/global.hpp"
 #include "../common/uarch.hpp"
@@ -34,9 +35,8 @@ struct agent_info {
      snprintf(&(err_val[0]), sizeof(err_val), "%#x", (uint32_t)err);         \
      err_str = &(err_val[0]);                                                \
    }                                                                         \
-    printErr("HSA failure at: %s:%d\n",                              \
-                      __FILE__, __LINE__);                           \
-    printErr("Call returned %s\n", err_str);                         \
+    printErr("HSA failure at: %s:%d\n", __FILE__, __LINE__);                  \
+    printErr("Call returned %s\n", err_str);                                  \
    return (err);                                                             \
  }                                                                           \
 }
@@ -52,7 +52,6 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, info->gpu_name);
    RET_IF_HSA_ERR(err);

-    // TODO: What if vendor_name is not AMD?
    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, info->vendor_name);
    RET_IF_HSA_ERR(err);

@@ -92,11 +91,8 @@ struct gpu_info* get_gpu_info_hsa(struct pci_dev *devices, int gpu_idx) {
    return NULL;
  }

-  hsa_status_t status;
-
-  // Initialize the HSA runtime
-  status = hsa_init();
-  if (status != HSA_STATUS_SUCCESS) {
+  hsa_status_t err = hsa_init();
+  if (err != HSA_STATUS_SUCCESS) {
    printErr("Failed to initialize HSA runtime");
    return NULL;
  }
@@ -105,23 +101,35 @@ struct gpu_info* get_gpu_info_hsa(struct pci_dev *devices, int gpu_idx) {
  info.deviceId = gpu_idx;

  // Iterate over all agents in the system
-  status = hsa_iterate_agents(agent_callback, &info);
-  if (status != HSA_STATUS_SUCCESS) {
+  err = hsa_iterate_agents(agent_callback, &info);
+  if (err != HSA_STATUS_SUCCESS) {
    printErr("Failed to iterate HSA agents");
    hsa_shut_down();
    return NULL;
  }

-  gpu->freq = info.max_clock_freq;
+  if (strcmp(info.vendor_name, "AMD") != 0) {
+    printErr("HSA vendor name is: '%s'. Only AMD is supported!", info.vendor_name);
+    return NULL;
+  }
  gpu->vendor = GPU_VENDOR_AMD;
+
+  gpu->freq = info.max_clock_freq;
+  gpu->topo_h = get_topology_info(info);
  gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1));
  strcpy(gpu->name, info.device_mkt_name);
-  gpu->topo_h = get_topology_info(info);
+  gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name);

-  // TODO: Use gpu_name for uarch detection
+  if (gpu->arch == NULL) {
+    return NULL;
+  }

  // Shut down the HSA runtime
-  hsa_shut_down();
+  err = hsa_shut_down();
+  if (err != HSA_STATUS_SUCCESS) {
+    printErr("Failed to shutdown HSA runtime");
+    return NULL;
+  }
  return gpu;
 }

--- a/src/hsa/uarch.cpp
+++ b/src/hsa/uarch.cpp
@@ -0,0 +1,321 @@
+#include <cstdlib>
+#include <cstdint>
+#include <cstring>
+
+#include "../common/uarch.hpp"
+#include "../common/global.hpp"
+#include "../common/gpu.hpp"
+#include "chips.hpp"
+
+// MICROARCH values
+enum {
+  UARCH_UNKNOWN,
+  // GCN (Graphics Core Next)
+  // Empty for now
+  // ...
+  // RDNA (Radeon DNA)
+  UARCH_RDNA,
+  UARCH_RDNA2,
+  UARCH_RDNA3,
+  UARCH_RDNA4,
+  // CDNA (Compute DNA)
+  UARCH_CDNA,
+  UARCH_CDNA2,
+  UARCH_CDNA3,
+  UARCH_CDNA4
+};
+
+static const char *uarch_str[] = {
+  /*[ARCH_UNKNOWN]    = */ STRING_UNKNOWN,
+  /*[UARCH_RDNA]      = */ "RDNA",
+  /*[UARCH_RDNA2]     = */ "RDNA2",
+  /*[UARCH_RDNA3]     = */ "RDNA3",
+  /*[UARCH_RDNA4]     = */ "RDNA4",
+  /*[UARCH_CDNA]      = */ "CDNA",
+  /*[UARCH_CDNA2]     = */ "CDNA2",
+  /*[UARCH_CDNA3]     = */ "CDNA3",
+  /*[UARCH_CDNA4]     = */ "CDNA4",
+};
+
+// Sources: 
+// - https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html
+// - https://www.techpowerup.com
+//
+// This is sometimes refered to as LLVM target, but also shader ISA.
+//
+// LLVM target *usually* maps to a specific architecture. However there
+// are case where this is not true:
+// MI8 is GCN3.0 with LLVM target gfx803
+// MI6 is GCN4.0 with LLVM target gfx803
+// or
+// Strix Point can be gfx1150 or gfx1151
+//
+// NOTE: GCN chips are stored for completeness, but they are
+// not actively supported.
+enum {
+  TARGET_UNKNOWN_HSA,
+  /// GCN (Graphics Core Next)
+  /// ------------------------
+  // GCN 1.0 
+  TARGET_GFX600,
+  TARGET_GFX601,
+  TARGET_GFX602,
+  // GCN 2.0
+  TARGET_GFX700,
+  TARGET_GFX701,
+  TARGET_GFX702,
+  TARGET_GFX703,
+  TARGET_GFX704,
+  TARGET_GFX705,
+  // GCN 3.0 / 4.0
+  TARGET_GFX801,
+  TARGET_GFX802,
+  TARGET_GFX803,
+  TARGET_GFX805,
+  TARGET_GFX810,
+  // GCN 5.0
+  TARGET_GFX900,
+  TARGET_GFX902,
+  TARGET_GFX904,
+  // GCN 5.1
+  TARGET_GFX906,
+  // ???
+  TARGET_GFX909,
+  TARGET_GFX90C,
+  /// RDNA (Radeon DNA)
+  /// -----------------
+  // RDNA1
+  TARGET_GFX1010,
+  TARGET_GFX1011,
+  TARGET_GFX1012,
+  // RDNA2
+  TARGET_GFX1013, // Oberon
+  TARGET_GFX1030,
+  TARGET_GFX1031,
+  TARGET_GFX1032,
+  TARGET_GFX1033,
+  TARGET_GFX1034,
+  TARGET_GFX1035, // ??
+  TARGET_GFX1036, // ??
+  // RDNA3
+  TARGET_GFX1100,
+  TARGET_GFX1101,
+  TARGET_GFX1102,
+  TARGET_GFX1103, // ???
+  // RDNA3.5
+  TARGET_GFX1150, // Strix Point
+  TARGET_GFX1151, // Strix Halo / Strix Point
+  TARGET_GFX1152, // Krackan Point
+  TARGET_GFX1153, // ???
+  // RDNA4
+  TARGET_GFX1200,
+  TARGET_GFX1201,
+  TARGET_GFX1250, // ???
+  TARGET_GFX1251, // ???
+  /// CDNA (Compute DNA)
+  /// ------------------
+  // CDNA
+  TARGET_GFX908,
+  // CDNA2
+  TARGET_GFX90A,
+  // CDNA3
+  TARGET_GFX942,
+  // CDNA4
+  TARGET_GFX950  
+};
+
+#define CHECK_UARCH_START if (false) {}
+#define CHECK_UARCH(arch, chip_, str, uarch, process) \
+   else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
+#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_hsa: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, UNK); }
+
+void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
+  arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
+  strcpy(arch->chip_str, str);
+  arch->uarch = u;
+  arch->process = process;
+}
+
+// On chiplet based chips (such as Navi31, Navi32, etc),
+// we have 2 different processes: The MCD process and the
+// rest of the chip process. They might be different and here
+// we just take one - let's take MCD process for now.
+//
+// TODO: Should we differentiate?
+void map_chip_to_uarch_hsa(struct uarch* arch) {
+  CHECK_UARCH_START
+
+  // RDNA
+  CHECK_UARCH(arch, CHIP_NAVI_10,  "Navi 10", UARCH_RDNA,  7)
+  CHECK_UARCH(arch, CHIP_NAVI_12,  "Navi 12", UARCH_RDNA,  7)
+  CHECK_UARCH(arch, CHIP_NAVI_14,  "Navi 14", UARCH_RDNA,  7)
+  CHECK_UARCH(arch, CHIP_NAVI_21,  "Navi 21", UARCH_RDNA2, 7)
+  CHECK_UARCH(arch, CHIP_NAVI_22,  "Navi 22", UARCH_RDNA2, 7)
+  CHECK_UARCH(arch, CHIP_NAVI_23,  "Navi 23", UARCH_RDNA2, 7)
+  CHECK_UARCH(arch, CHIP_NAVI_24,  "Navi 24", UARCH_RDNA2, 6)
+  CHECK_UARCH(arch, CHIP_NAVI_31,  "Navi 31", UARCH_RDNA3, 6)
+  CHECK_UARCH(arch, CHIP_NAVI_32,  "Navi 32", UARCH_RDNA3, 6)
+  CHECK_UARCH(arch, CHIP_NAVI_33,  "Navi 33", UARCH_RDNA3, 6)
+  CHECK_UARCH(arch, CHIP_NAVI_44,  "Navi 44", UARCH_RDNA4, 4)
+  CHECK_UARCH(arch, CHIP_NAVI_48,  "Navi 48", UARCH_RDNA4, 4)
+  // CDNA
+  // NOTE: We will not show chip name for CDNA, thus use empty str
+  CHECK_UARCH(arch, CHIP_ARCTURUS,        "", UARCH_CDNA,  7)
+  CHECK_UARCH(arch, CHIP_ALDEBARAN,       "", UARCH_CDNA2, 6)
+  CHECK_UARCH(arch, CHIP_AQUA_VANJARAM,   "", UARCH_CDNA3, 6)
+  CHECK_UARCH(arch, CHIP_CDNA_NEXT,       "", UARCH_CDNA4, 6) // big difference between MCD and rest of the chip process
+  
+  CHECK_UARCH_END
+}
+
+#define CHECK_TGT_START if (false) {}
+#define CHECK_TGT(target, llvm_target, chip) \
+  else if (target == llvm_target) return chip;
+#define CHECK_TGT_END else { printBug("LLVM target '%d' has no matching chip", target); return CHIP_UNKNOWN_HSA; }
+
+// We have at least 2 choices to infer the chip:
+//
+// - LLVM target (e.g., gfx1101 is Navi 32)
+// - PCI ID (e.g., 0x7470 is Navi 32)
+//
+// For now we will use the first approach, which seems to have
+// some issues like mentioned in the enum.
+// However PCI detection is also not perfect, since it is
+// quite hard to find PCI ids from old hardware.
+GPUCHIP get_chip_from_target_hsa(int32_t target) {
+  CHECK_TGT_START
+  /// RDNA
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX1010, CHIP_NAVI_10)
+  CHECK_TGT(target, TARGET_GFX1011, CHIP_NAVI_12)
+  CHECK_TGT(target, TARGET_GFX1012, CHIP_NAVI_14)
+  // CHECK_TGT(target, TARGET_GFX1013, TODO)
+  /// RDNA2
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX1030, CHIP_NAVI_21)
+  CHECK_TGT(target, TARGET_GFX1031, CHIP_NAVI_22)
+  CHECK_TGT(target, TARGET_GFX1032, CHIP_NAVI_23)
+  CHECK_TGT(target, TARGET_GFX1033, CHIP_NAVI_21)
+  CHECK_TGT(target, TARGET_GFX1034, CHIP_NAVI_24)
+  // CHECK_TGT(target, TARGET_GFX1035, TODO)
+  // CHECK_TGT(target, TARGET_GFX1036, TODO)
+  /// RDNA3
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX1100, CHIP_NAVI_31)
+  CHECK_TGT(target, TARGET_GFX1101, CHIP_NAVI_32)
+  CHECK_TGT(target, TARGET_GFX1102, CHIP_NAVI_33)
+  // CHECK_TGT(target, TARGET_GFX1103, TODO)
+  /// RDNA3.5
+  /// -------------------------------------------
+  // CHECK_TGT(target, TARGET_GFX1150, TODO)
+  // CHECK_TGT(target, TARGET_GFX1151, TODO)
+  // CHECK_TGT(target, TARGET_GFX1152, TODO)
+  // CHECK_TGT(target, TARGET_GFX1153, TODO)
+  /// RDNA4
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX1200, CHIP_NAVI_44)
+  CHECK_TGT(target, TARGET_GFX1201, CHIP_NAVI_48)
+  // CHECK_TGT(target, TARGET_GFX1250, TODO)
+  // CHECK_TGT(target, TARGET_GFX1251, TODO)
+  /// CDNA
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX908, CHIP_ARCTURUS)
+  /// CDNA2
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX90A, CHIP_ALDEBARAN)
+  /// CDNA3
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX942, CHIP_AQUA_VANJARAM)
+  /// CDNA4
+  /// -------------------------------------------
+  CHECK_TGT(target, TARGET_GFX950, CHIP_CDNA_NEXT)
+  CHECK_TGT_END
+}
+
+#define CHECK_TGT_STR_START if (false) {}
+#define CHECK_TGT_STR(target, llvm_target, chip) \
+  else if (strcmp(target, llvm_target) == 0) return chip;
+#define CHECK_TGT_STR_END else { return TARGET_UNKNOWN_HSA; }
+
+// Maps the LLVM target string to the enum value
+int32_t get_llvm_target_from_str(char* target) {
+  // TODO: Autogenerate this
+  // TODO: Add all, not only the ones we support in get_chip_from_target_hsa
+  CHECK_TGT_STR_START
+  CHECK_TGT_STR(target, "gfx1010", TARGET_GFX1010)
+  CHECK_TGT_STR(target, "gfx1011", TARGET_GFX1011)
+  CHECK_TGT_STR(target, "gfx1012", TARGET_GFX1012)
+  CHECK_TGT_STR(target, "gfx1013", TARGET_GFX1013)
+  CHECK_TGT_STR(target, "gfx1030", TARGET_GFX1030)
+  CHECK_TGT_STR(target, "gfx1031", TARGET_GFX1031)
+  CHECK_TGT_STR(target, "gfx1032", TARGET_GFX1032)
+  CHECK_TGT_STR(target, "gfx1033", TARGET_GFX1033)
+  CHECK_TGT_STR(target, "gfx1034", TARGET_GFX1034)
+  CHECK_TGT_STR(target, "gfx1035", TARGET_GFX1035)
+  CHECK_TGT_STR(target, "gfx1036", TARGET_GFX1036)
+  CHECK_TGT_STR(target, "gfx1100", TARGET_GFX1100)
+  CHECK_TGT_STR(target, "gfx1101", TARGET_GFX1101)
+  CHECK_TGT_STR(target, "gfx1102", TARGET_GFX1102)
+  CHECK_TGT_STR(target, "gfx1103", TARGET_GFX1103)
+  CHECK_TGT_STR(target, "gfx1200", TARGET_GFX1200)
+  CHECK_TGT_STR(target, "gfx1201", TARGET_GFX1201)
+  CHECK_TGT_STR(target, "gfx1250", TARGET_GFX1250)
+  CHECK_TGT_STR(target, "gfx1251", TARGET_GFX1251)
+  CHECK_TGT_STR(target, "gfx908",  TARGET_GFX908)
+  CHECK_TGT_STR(target, "gfx90a",  TARGET_GFX90A)
+  CHECK_TGT_STR(target, "gfx942",  TARGET_GFX942)
+  CHECK_TGT_STR(target, "gfx950",  TARGET_GFX950)
+  CHECK_TGT_STR_END
+}
+
+struct uarch* get_uarch_from_hsa(struct gpu_info* gpu, char* gpu_name) {
+  struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
+
+  arch->llvm_target = get_llvm_target_from_str(gpu_name);
+  if (arch->llvm_target == TARGET_UNKNOWN_HSA) {
+    printErr("Unknown LLVM target: '%s'", gpu_name);
+    return NULL;
+  }
+
+  arch->chip_str = NULL;
+  arch->chip = get_chip_from_target_hsa(arch->llvm_target);
+  map_chip_to_uarch_hsa(arch);
+
+  return arch;
+}
+
+bool is_uarch_valid(struct uarch* arch) {
+  if (arch == NULL) {
+    printBug("Invalid uarch: arch is NULL");
+    return false;
+  }
+  if (arch->uarch >= UARCH_UNKNOWN && arch->uarch <= UARCH_CDNA4) {
+    return true;
+  }
+  else {
+    printBug("Invalid uarch: %d", arch->uarch);
+    return false;
+  }
+}
+
+bool is_cdna(struct uarch* arch) {
+  return arch->uarch == UARCH_CDNA ||
+         arch->uarch == UARCH_CDNA2 ||
+         arch->uarch == UARCH_CDNA3 ||
+         arch->uarch == UARCH_CDNA4;
+}
+
+char* get_str_chip(struct uarch* arch) {
+  // We dont want to show CDNA chip names as they add
+  // no value, since each architecture maps one to one
+  // to a chip.
+  if (is_cdna(arch)) return NULL;
+  return arch->chip_str;
+}
+
+const char* get_str_uarch_hsa(struct uarch* arch) {
+  if (!is_uarch_valid(arch)) {
+    return NULL;
+  }
+  return uarch_str[arch->uarch];
+}
--- a/src/hsa/uarch.hpp
+++ b/src/hsa/uarch.hpp
@@ -0,0 +1,13 @@
+#ifndef __HSA_UARCH__
+#define __HSA_UARCH__
+
+#include "../common/gpu.hpp"
+
+struct uarch;
+
+struct uarch* get_uarch_from_hsa(struct gpu_info* gpu, char* gpu_name);
+char* get_str_uarch_hsa(struct uarch* arch);
+char* get_str_process(struct uarch* arch); // TODO: Shouldnt we define this in the cpp?
+char* get_str_chip(struct uarch* arch);
+
+#endif
Author	SHA1	Message	Date
Dr-Noob	27655dc601	Show chip name	2025-10-15 08:21:10 +02:00
Dr-Noob	abc21365b1	These changes should go later in another commit - lets keep this HSA only	2025-10-15 07:45:34 +02:00
Dr-Noob	3e8b87a888	Include common uarch.h in all backends (fixes include issue)	2025-10-15 07:36:36 +02:00
Dr-Noob	044a52aab7	Cleanup includes	2025-10-15 07:31:49 +02:00
Dr-Noob	f9d5ba3a1c	Move get_str_process to common	2025-10-15 07:29:53 +02:00
Dr-Noob	1337ebede4	Actually no one is calling this guy	2025-10-15 07:28:15 +02:00
Dr-Noob	fd038963f1	Make sure uarch is valid before attempting to access it	2025-10-15 07:26:15 +02:00
Dr-Noob	d83904e28e	Fixes	2025-10-14 08:54:25 +02:00
Dr-Noob	2d74d66f79	Push all code; needs testing + review	2025-10-14 08:39:17 +02:00