[v0.30] Add support for AMD GPUs

Adds very basic support for AMD (experimental). The only install requirement is ROCm. Unlike NVIDIA, we don't need the CUDA equivalent (HIP) to make gpufetch work, which reduces the installation requirements quite significantly. Major changes: * CMakeLists: - Make CUDA not compiled by default (since we now may want to target AMD only) - Set build flags on gpufetch cmake target instead of doing "set(CMAKE_CXX_FLAGS". This fixes a warning coming from ROCm. - Assumes that the ROCm CMake files are installed (should be fixed later) * hsa folder: AMD support is implemented via HSA (Heterogeneous System Architecture) calls. Therefore, HSA is added as a new backend to gpufetch. We only print basic stuff for now, so we may need more things in the future to give full support for AMD GPUs. NOTE: This commit will probably break AUR packages since we used to build CUDA by default, which is no longer the case. The AUR package should be updated and use -DENABLE_CUDA_BACKEND or -DENABLE_HSA_BACKEND as appropriate.
2025-10-12 12:34:56 +02:00
parent 57caadf530
commit b29b17d14f
11 changed files with 344 additions and 21 deletions
--- a/src/hsa/hsa.cpp
+++ b/src/hsa/hsa.cpp
@@ -0,0 +1,130 @@
+#include <iostream>
+#include <hsa/hsa.h>
+#include <hsa/hsa_ext_amd.h>
+
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+
+#include <iostream>
+#include <iomanip>
+#include <hsa/hsa.h>
+#include <hsa/hsa_ext_amd.h>
+
+#include "hsa.hpp"
+#include "../common/pci.hpp"
+#include "../common/global.hpp"
+#include "../common/uarch.hpp"
+
+struct agent_info {
+  unsigned deviceId; // ID of the target GPU device
+  char gpu_name[64];  
+  char vendor_name[64];
+  char device_mkt_name[64];
+  uint32_t max_clock_freq;
+  uint32_t compute_unit;
+};
+
+#define RET_IF_HSA_ERR(err) { \
+  if ((err) != HSA_STATUS_SUCCESS) { \
+    char err_val[12];                                                         \
+    char* err_str = NULL;                                                     \
+    if (hsa_status_string(err,                                                \
+            (const char**)&err_str) != HSA_STATUS_SUCCESS) {                  \
+      snprintf(&(err_val[0]), sizeof(err_val), "%#x", (uint32_t)err);         \
+      err_str = &(err_val[0]);                                                \
+    }                                                                         \
+    printErr("HSA failure at: %s:%d\n",                              \
+                      __FILE__, __LINE__);                           \
+    printErr("Call returned %s\n", err_str);                         \
+    return (err);                                                             \
+  }                                                                           \
+}
+
+hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
+  struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
+
+  hsa_device_type_t type;
+  hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
+  RET_IF_HSA_ERR(err);
+
+  if (type == HSA_DEVICE_TYPE_GPU) {
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, info->gpu_name);
+    RET_IF_HSA_ERR(err);
+
+    // TODO: What if vendor_name is not AMD?
+    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, info->vendor_name);
+    RET_IF_HSA_ERR(err);
+
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_PRODUCT_NAME, &info->device_mkt_name);
+    RET_IF_HSA_ERR(err);
+
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &info->max_clock_freq);
+    RET_IF_HSA_ERR(err);
+
+    err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit);
+    RET_IF_HSA_ERR(err);
+  }
+
+  return HSA_STATUS_SUCCESS;
+}
+
+struct topology_h* get_topology_info(struct agent_info info) {
+  struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h));
+
+  topo->compute_units = info.compute_unit;
+
+  return topo;
+}
+
+struct gpu_info* get_gpu_info_hsa(struct pci_dev *devices, int gpu_idx) {
+  struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
+  gpu->pci = NULL;
+  gpu->idx = gpu_idx;
+
+  if(gpu->idx < 0) {
+    printErr("GPU index must be equal or greater than zero");
+    return NULL;
+  }
+
+  if(gpu->idx > 0) {
+    // Currently we only support fetching GPU 0.
+    return NULL;
+  }
+
+  hsa_status_t status;
+
+  // Initialize the HSA runtime
+  status = hsa_init();
+  if (status != HSA_STATUS_SUCCESS) {
+    printErr("Failed to initialize HSA runtime");
+    return NULL;
+  }
+
+  struct agent_info info;
+  info.deviceId = gpu_idx;
+
+  // Iterate over all agents in the system
+  status = hsa_iterate_agents(agent_callback, &info);
+  if (status != HSA_STATUS_SUCCESS) {
+    printErr("Failed to iterate HSA agents");
+    hsa_shut_down();
+    return NULL;
+  }
+
+  gpu->freq = info.max_clock_freq;
+  gpu->vendor = GPU_VENDOR_AMD;
+  gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1));
+  strcpy(gpu->name, info.device_mkt_name);
+  gpu->topo_h = get_topology_info(info);
+
+  // TODO: Use gpu_name for uarch detection
+
+  // Shut down the HSA runtime
+  hsa_shut_down();
+  return gpu;
+}
+
+char* get_str_cu(struct gpu_info* gpu) {
+  return get_str_generic(gpu->topo_h->compute_units);
+}