7 Commits

Author SHA1 Message Date
Dr-Noob
47091fded5 Improve build script 2025-10-17 07:39:02 +02:00
Dr-Noob
82ea16fc3d [v0.30] Fix warning in printer 2025-10-16 20:01:14 +02:00
Dr-Noob
6589de9717 [v0.30] Reorganize attributes in printer and add CUs attr for AMD 2025-10-16 19:53:48 +02:00
Dr-Noob
0950b97393 [v0.30] Build pciutils only if neccesary
If only HSA is enabled we dont need pciutils since AMD detection does
not rely on it. Therefore we change CMakeLists.txt to build pciutils
only if required.

This commit has some side-effects:
1. We now don't build Intel backend by default. In other words, no
   backend is built by default, the user must specify which backend
   to use.
2. There were some issues with includes and wrongly used defines and
   variables. This commit fixes all that.
2025-10-16 08:26:42 +02:00
Dr-Noob
8794cd322d [v0.30] Add support for building on AMD where rocm-cmake is not installed 2025-10-16 07:24:45 +02:00
Dr-Noob
5df85aea2c [v0.30] Add uarch detection to AMD GPUs
Similarly to NVIDIA and Intel GPUs, we now detect microarchitecture,
also with manufacturing process and specific chip name. We infer all
of this from the gfx name (in the code we use the term llvm_target),
altough it's not clear yet that this method is completely reliable (see
comments for more details). In the future we might want to replace that
with a better way. Once we have the gfx name, we *should* be able to
infer the specific chip, and from the chip we can easily infer the
microarchitecture.

This commit also includes some refactorings and code improvements on
the HSA backend.
2025-10-15 08:23:28 +02:00
Dr-Noob
b29b17d14f [v0.30] Add support for AMD GPUs
Adds very basic support for AMD (experimental). The only install
requirement is ROCm. Unlike NVIDIA, we don't need the CUDA equivalent
(HIP) to make gpufetch work, which reduces the installation
requirements quite significantly.

Major changes:

* CMakeLists:
  - Make CUDA not compiled by default (since we now may want to target
    AMD only)
  - Set build flags on gpufetch cmake target instead of doing
    "set(CMAKE_CXX_FLAGS". This fixes a warning coming from ROCm.
  - Assumes that the ROCm CMake files are installed (should be fixed
    later)

* hsa folder: AMD support is implemented via HSA (Heterogeneous System
  Architecture) calls. Therefore, HSA is added as a new backend to
  gpufetch. We only print basic stuff for now, so we may need more
  things in the future to give full support for AMD GPUs.

NOTE: This commit will probably break AUR packages since we used to
build CUDA by default, which is no longer the case. The AUR package
should be updated and use -DENABLE_CUDA_BACKEND or -DENABLE_HSA_BACKEND
as appropriate.
2025-10-12 12:34:56 +02:00
15 changed files with 668 additions and 145 deletions

View File

@@ -10,9 +10,10 @@ set(CUDA_DIR "${SRC_DIR}/cuda")
set(HSA_DIR "${SRC_DIR}/hsa")
set(INTEL_DIR "${SRC_DIR}/intel")
# Enable Intel backend by default
if(NOT DEFINED ENABLE_INTEL_BACKEND)
set(ENABLE_INTEL_BACKEND true)
# Make sure that at least one backend is enabled.
# It does not make sense that the user has not specified any backend.
if(NOT ENABLE_INTEL_BACKEND AND NOT ENABLE_CUDA_BACKEND AND NOT ENABLE_HSA_BACKEND)
message(FATAL_ERROR "No backend was enabled! Please enable at least one backend with -DENABLE_XXX_BACKEND")
endif()
if(ENABLE_CUDA_BACKEND)
@@ -27,8 +28,7 @@ if(ENABLE_CUDA_BACKEND)
endif()
if(ENABLE_HSA_BACKEND)
# TODO: Needs rocm-cmake, what if its not insalled?
find_package(ROCmCMakeBuildTools)
find_package(ROCmCMakeBuildTools QUIET)
if (ROCmCMakeBuildTools_FOUND)
find_package(hsa-runtime64 1.0 REQUIRED)
link_directories(hsa_backend hsa-runtime64::hsa-runtime64)
@@ -48,15 +48,51 @@ if(ENABLE_HSA_BACKEND)
message(STATUS "${BoldYellow}HSA not found, disabling HSA backend${ColorReset}")
set(ENABLE_HSA_BACKEND false)
endif()
else()
# rocm-cmake is not installed, try to manually find neccesary files.
message(STATUS "${BoldYellow}Could NOT find HSA automatically, running manual search...${ColorReset}")
if (NOT DEFINED ROCM_PATH)
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to ROCm")
endif()
find_path(HSA_INCLUDE_DIR hsa/hsa.h HINTS ${ROCM_PATH}/include)
find_library(HSA_LIBRARY hsa-runtime64 HINTS ${ROCM_PATH}/lib ${ROCM_PATH}/lib64)
if (HSA_INCLUDE_DIR AND HSA_LIBRARY)
message(STATUS "${BoldYellow}HSA was found manually${ColorReset}")
else()
set(ENABLE_HSA_BACKEND false)
message(STATUS "${BoldYellow}ROCm not found${ColorReset}")
message(STATUS "${BoldYellow}HSA was not found manually${ColorReset}")
endif()
endif()
endif()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
find_package(PCIUTILS)
if(NOT ${PCIUTILS_FOUND})
set(GPUFECH_COMMON
${COMMON_DIR}/main.cpp
${COMMON_DIR}/args.cpp
${COMMON_DIR}/gpu.cpp
${COMMON_DIR}/global.cpp
${COMMON_DIR}/printer.cpp
${COMMON_DIR}/master.cpp
${COMMON_DIR}/uarch.cpp
)
set(GPUFETCH_LINK_TARGETS z)
if(NOT(ENABLE_HSA_BACKEND AND NOT ENABLE_CUDA_BACKEND AND NOT ENABLE_INTEL_BACKEND))
# Look for pciutils only if not building HSA only.
#
# This has the (intented) secondary effect that if only HSA backend is enabled
# by the user, but ROCm cannot be found, pciutils will still be compiled in
# order to show the list of GPUs available on the system, so that the user will
# get at least some feedback even if HSA is not found.
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
list(APPEND GPUFECH_COMMON ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/sort.cpp)
list(APPEND GPUFETCH_LINK_TARGETS pci)
set(CMAKE_ENABLE_PCIUTILS ON)
find_package(PCIUTILS)
if(NOT ${PCIUTILS_FOUND})
message(STATUS "${BoldYellow}pciutils not found, downloading and building a local copy...${ColorReset}")
# Download and build pciutils
@@ -71,18 +107,23 @@ if(NOT ${PCIUTILS_FOUND})
include_directories(${PCIUTILS_INSTALL_LOCATION}/include)
link_directories(${PCIUTILS_INSTALL_LOCATION}/lib)
else()
else()
include_directories(${PCIUTILS_INCLUDE_DIR})
link_libraries(${PCIUTILS_LIBRARIES})
# Needed for linking libpci in FreeBSD
link_directories(/usr/local/lib/)
endif()
endif()
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/sort.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
add_executable(gpufetch ${GPUFECH_COMMON})
set(SANITY_FLAGS -Wfloat-equal -Wshadow -Wpointer-arith -Wall -Wextra -pedantic -fstack-protector-all -pedantic)
target_compile_features(gpufetch PRIVATE cxx_std_11)
target_compile_options(gpufetch PRIVATE ${SANITY_FLAGS})
if (CMAKE_ENABLE_PCIUTILS)
target_compile_definitions(gpufetch PUBLIC BACKEND_USE_PCI)
endif()
if(ENABLE_INTEL_BACKEND)
target_compile_definitions(gpufetch PUBLIC BACKEND_INTEL)
@@ -127,20 +168,24 @@ endif()
if(ENABLE_HSA_BACKEND)
target_compile_definitions(gpufetch PUBLIC BACKEND_HSA)
add_library(hsa_backend STATIC ${HSA_DIR}/hsa.cpp)
add_library(hsa_backend STATIC ${HSA_DIR}/hsa.cpp ${HSA_DIR}/uarch.cpp)
if(NOT ${PCIUTILS_FOUND})
add_dependencies(hsa_backend pciutils)
endif()
target_include_directories(hsa_backend PRIVATE "${HSA_INCLUDE_DIR}")
message(STATUS "Found HSA: ${HSA_INCLUDE_DIR}")
if (HSA_LIBRARY)
target_link_libraries(hsa_backend PRIVATE ${HSA_LIBRARY})
else()
target_link_libraries(hsa_backend PRIVATE hsa-runtime64::hsa-runtime64)
endif()
target_link_libraries(gpufetch hsa_backend)
endif()
target_link_libraries(gpufetch pci z)
target_link_libraries(gpufetch ${GPUFETCH_LINK_TARGETS})
install(TARGETS gpufetch DESTINATION bin)
if(NOT WIN32)

View File

@@ -1,5 +1,24 @@
#!/bin/bash
print_help() {
cat << EOF
Usage: $0 <backends> [build_type]
<backends> MANDATORY. Comma-separated list of
backends to enable.
Valid options: hsa, intel, cuda
Example: hsa,cuda
[build_type] OPTIONAL. Build type. Valid options:
debug, release (default: release)
Examples:
$0 hsa,intel debug
$0 cuda
$0 hsa,intel,cuda release
EOF
}
# gpufetch build script
set -e
@@ -7,19 +26,79 @@ rm -rf build/ gpufetch
mkdir build/
cd build/
if [ "$1" == "debug" ]
if [ "$1" == "--help" ]
then
BUILD_TYPE="Debug"
else
BUILD_TYPE="Release"
echo "gpufetch build script"
echo
print_help
exit 0
fi
if [[ $# -lt 1 ]]; then
echo "ERROR: At least one backend must be specified."
echo
print_help
exit 1
fi
# Determine if last argument is build type
LAST_ARG="${!#}"
if [[ "$LAST_ARG" == "debug" || "$LAST_ARG" == "release" ]]; then
BUILD_TYPE="$LAST_ARG"
BACKEND_ARG="${1}"
else
BUILD_TYPE="release"
BACKEND_ARG="${1}"
fi
# Split comma-separated backends into an array
IFS=',' read -r -a BACKENDS <<< "$BACKEND_ARG"
# Validate build type
if [[ "$BUILD_TYPE" != "debug" && "$BUILD_TYPE" != "release" ]]
then
echo "Error: Invalid build type '$BUILD_TYPE'."
echo "Valid options are: debug, release"
exit 1
fi
# From lower to upper case
CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=${BUILD_TYPE^}"
# Validate backends
VALID_BACKENDS=("hsa" "intel" "cuda")
for BACKEND in "${BACKENDS[@]}"; do
case "$BACKEND" in
hsa)
CMAKE_FLAGS+=" -DENABLE_HSA_BACKEND=ON"
;;
intel)
CMAKE_FLAGS+=" -DENABLE_INTEL_BACKEND=ON"
;;
cuda)
CMAKE_FLAGS+=" -DENABLE_CUDA_BACKEND=ON"
;;
*)
echo "ERROR: Invalid backend '$BACKEND'."
echo "Valid options: ${VALID_BACKENDS[*]}"
exit 1
;;
esac
done
# You can also manually specify the compilation flags.
# If you need to, just run the cmake command directly
# instead of using this script.
#
# Here you will find some help:
#
# In case you have CUDA installed but it is not detected,
# - set CMAKE_CUDA_COMPILER to your nvcc binary:
# - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir
# for example:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
#
# In case you want to explicitely disable a backend, you can:
# Disable CUDA backend:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_CUDA_BACKEND=OFF ..
@@ -28,7 +107,9 @@ fi
# Disable Intel backend:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_INTEL_BACKEND=OFF ..
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE ..
echo "$0: Running cmake $CMAKE_FLAGS"
echo
cmake $CMAKE_FLAGS ..
os=$(uname)
if [ "$os" == 'Linux' ]; then

View File

@@ -3,8 +3,6 @@
#include <cstdint>
#include "../cuda/pci.hpp"
#define UNKNOWN_FREQ -1
enum {

View File

@@ -8,7 +8,11 @@
#include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp"
static const char* VERSION = "0.25";
#ifdef BACKEND_USE_PCI
#include "pci.hpp"
#endif
static const char* VERSION = "0.30";
void print_help(char *argv[]) {
const char **t = args_str;
@@ -79,8 +83,12 @@ int main(int argc, char* argv[]) {
}
if(get_num_gpus_available(list) == 0) {
#ifdef BACKEND_USE_PCI
printErr("No GPU was detected! Available GPUs are:");
print_gpus_list_pci();
#else
printErr("No GPU was detected!");
#endif
printf("Please, make sure that the appropiate backend is enabled:\n");
print_enabled_backends();
printf("Visit https://github.com/Dr-Noob/gpufetch#2-backends for more information\n");

View File

@@ -1,7 +1,10 @@
#include <cstdlib>
#include <cstdio>
#include "pci.hpp"
#ifdef BACKEND_USE_PCI
#include "pci.hpp"
#endif
#include "global.hpp"
#include "colors.hpp"
#include "master.hpp"
@@ -19,7 +22,9 @@ struct gpu_list {
struct gpu_list* get_gpu_list() {
int idx = 0;
#ifdef BACKEND_USE_PCI
struct pci_dev *devices = get_pci_devices_from_pciutils();
#endif
struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
list->num_gpus = 0;
list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
@@ -40,7 +45,7 @@ struct gpu_list* get_gpu_list() {
bool valid = true;
while(valid) {
list->gpus[idx] = get_gpu_info_hsa(devices, idx);
list->gpus[idx] = get_gpu_info_hsa(idx);
if(list->gpus[idx] != NULL) idx++;
else valid = false;
}

View File

@@ -11,6 +11,7 @@
#include "../intel/uarch.hpp"
#include "../intel/intel.hpp"
#include "../hsa/hsa.hpp"
#include "../hsa/uarch.hpp"
#include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp"
@@ -31,64 +32,54 @@
#define MAX_ATTRIBUTES 100
#define MAX_TERM_SIZE 1024
typedef struct {
int id;
const char *name;
const char *shortname;
} AttributeField;
// AttributeField IDs
// Used by
enum {
ATTRIBUTE_NAME,
ATTRIBUTE_CHIP,
ATTRIBUTE_UARCH,
ATTRIBUTE_TECHNOLOGY,
ATTRIBUTE_GT,
ATTRIBUTE_FREQUENCY,
ATTRIBUTE_STREAMINGMP,
ATTRIBUTE_CORESPERMP,
ATTRIBUTE_CUDA_CORES,
ATTRIBUTE_TENSOR_CORES,
ATTRIBUTE_EUS,
ATTRIBUTE_L2,
ATTRIBUTE_MEMORY,
ATTRIBUTE_MEMORY_FREQ,
ATTRIBUTE_BUS_WIDTH,
ATTRIBUTE_PEAK,
ATTRIBUTE_PEAK_TENSOR,
ATTRIBUTE_NAME, // ALL
ATTRIBUTE_CHIP, // ALL
ATTRIBUTE_UARCH, // ALL
ATTRIBUTE_TECHNOLOGY, // ALL
ATTRIBUTE_FREQUENCY, // ALL
ATTRIBUTE_PEAK, // ALL
ATTRIBUTE_COMPUTE_UNITS, // HSA
ATTRIBUTE_STREAMINGMP, // CUDA
ATTRIBUTE_CORESPERMP, // CUDA
ATTRIBUTE_CUDA_CORES, // CUDA
ATTRIBUTE_TENSOR_CORES, // CUDA
ATTRIBUTE_L2, // CUDA
ATTRIBUTE_MEMORY, // CUDA
ATTRIBUTE_MEMORY_FREQ, // CUDA
ATTRIBUTE_BUS_WIDTH, // CUDA
ATTRIBUTE_PEAK_TENSOR, // CUDA
ATTRIBUTE_EUS, // Intel
ATTRIBUTE_GT, // Intel
};
static const char* ATTRIBUTE_FIELDS [] = {
"Name:",
"GPU processor:",
"Microarchitecture:",
"Technology:",
"Graphics Tier:",
"Max Frequency:",
"SMs:",
"Cores/SM:",
"CUDA Cores:",
"Tensor Cores:",
"Execution Units:",
"L2 Size:",
"Memory:",
"Memory frequency:",
"Bus width:",
"Peak Performance:",
"Peak Performance (MMA):",
};
static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Name:",
"Processor:",
"uArch:",
"Technology:",
"GT:",
"Max Freq.:",
"SMs:",
"Cores/SM:",
"CUDA Cores:",
"Tensor Cores:",
"EUs:",
"L2 Size:",
"Memory:",
"Memory freq.:",
"Bus width:",
"Peak Perf.:",
"Peak Perf.(MMA):",
static const AttributeField ATTRIBUTE_INFO[] = {
{ ATTRIBUTE_NAME, "Name:", "Name:" },
{ ATTRIBUTE_CHIP, "GPU processor:", "Processor:" },
{ ATTRIBUTE_UARCH, "Microarchitecture:", "uArch:" },
{ ATTRIBUTE_TECHNOLOGY, "Technology:", "Technology:" },
{ ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" },
{ ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" },
{ ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" },
{ ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" },
{ ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" },
{ ATTRIBUTE_CUDA_CORES, "CUDA Cores:", "CUDA Cores:" },
{ ATTRIBUTE_TENSOR_CORES, "Tensor Cores:", "Tensor Cores:" },
{ ATTRIBUTE_L2, "L2 Size:", "L2 Size:" },
{ ATTRIBUTE_MEMORY, "Memory:", "Memory:" },
{ ATTRIBUTE_MEMORY_FREQ, "Memory frequency:", "Memory freq.:" },
{ ATTRIBUTE_BUS_WIDTH, "Bus width:", "Bus width:" },
{ ATTRIBUTE_PEAK_TENSOR, "Peak Performance (MMA):", "Peak Perf.(MMA):" },
{ ATTRIBUTE_EUS, "Execution Units:", "EUs:" },
{ ATTRIBUTE_GT, "Graphics Tier:", "GT:" },
};
struct terminal {
@@ -206,8 +197,6 @@ bool ascii_fits_screen(int termw, struct ascii_logo logo, int lf) {
void replace_bgbyfg_color(struct ascii_logo* logo) {
// Replace background by foreground color
for(int i=0; i < 2; i++) {
if(logo->color_ascii[i] == NULL) break;
if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
@@ -275,13 +264,14 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
}
}
uint32_t longest_attribute_length(struct ascii* art, const char** attribute_fields) {
uint32_t longest_attribute_length(struct ascii* art, bool use_short) {
uint32_t max = 0;
uint64_t len = 0;
for(uint32_t i=0; i < art->n_attributes_set; i++) {
if(art->attributes[i]->value != NULL) {
len = strlen(attribute_fields[art->attributes[i]->type]);
const char* str = use_short ? ATTRIBUTE_INFO[art->attributes[i]->type].shortname : ATTRIBUTE_INFO[art->attributes[i]->type].name;
len = strlen(str);
if(len > max) max = len;
}
}
@@ -305,7 +295,7 @@ uint32_t longest_field_length(struct ascii* art, int la) {
return max;
}
void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, const char** attribute_fields) {
void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, bool use_short) {
struct ascii_logo* logo = art->art;
int attr_to_print = 0;
int attr_type;
@@ -349,11 +339,13 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
attr_value = art->attributes[attr_to_print]->value;
attr_to_print++;
space_right = 1 + (la - strlen(attribute_fields[attr_type]));
const char* attr_str = use_short ? ATTRIBUTE_INFO[attr_type].shortname : ATTRIBUTE_INFO[attr_type].name;
space_right = 1 + (la - strlen(attr_str));
current_space = max(0, text_space);
printf("%s%.*s%s", logo->color_text[0], current_space, attribute_fields[attr_type], art->reset);
current_space = max(0, current_space - (int) strlen(attribute_fields[attr_type]));
printf("%s%.*s%s", logo->color_text[0], current_space, attr_str, art->reset);
current_space = max(0, current_space - (int) strlen(attr_str));
printf("%*s", min(current_space, space_right), "");
current_space = max(0, current_space - min(current_space, space_right));
printf("%s%.*s%s", logo->color_text[1], current_space, attr_value, art->reset);
@@ -387,19 +379,19 @@ bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, stru
setAttribute(art, ATTRIBUTE_EUS, eus);
setAttribute(art, ATTRIBUTE_PEAK, pp);
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, use_short);
uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT;
longest_attribute = longest_attribute_length(art, attribute_fields);
use_short = true;
longest_attribute = longest_attribute_length(art, use_short);
}
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);
return true;
}
@@ -456,19 +448,19 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
}
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, use_short);
uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT;
longest_attribute = longest_attribute_length(art, attribute_fields);
use_short = true;
longest_attribute = longest_attribute_length(art, use_short);
}
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);
free(manufacturing_process);
free(max_frequency);
@@ -490,26 +482,34 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
return false;
char* gpu_name = get_str_gpu_name(gpu);
char* sms = get_str_cu(gpu);
char* gpu_chip = get_str_chip(gpu->arch);
char* uarch = get_str_uarch_hsa(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch);
char* cus = get_str_cu(gpu);
char* max_frequency = get_str_freq(gpu);
setAttribute(art, ATTRIBUTE_NAME, gpu_name);
if (gpu_chip != NULL) {
setAttribute(art, ATTRIBUTE_CHIP, gpu_chip);
}
setAttribute(art, ATTRIBUTE_UARCH, uarch);
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus);
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, use_short);
uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT;
longest_attribute = longest_attribute_length(art, attribute_fields);
use_short = true;
longest_attribute = longest_attribute_length(art, use_short);
}
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
print_ascii_generic(art, longest_attribute, term->w - art->art->width, use_short);
free(art->attributes);
free(art);

View File

@@ -16,6 +16,9 @@ struct uarch {
int32_t cc_minor;
int32_t compute_capability;
// HSA specific
int32_t llvm_target;
// Intel specific
int32_t gt;
int32_t eu;

View File

@@ -5,8 +5,8 @@
#include "cuda.hpp"
#include "uarch.hpp"
#include "pci.hpp"
#include "gpufetch_helper_cuda.hpp"
#include "../common/pci.hpp"
#include "../common/global.hpp"
#include "../common/uarch.hpp"
@@ -33,10 +33,8 @@ int get_tensor_cores(struct uarch* arch, int sm, int major) {
if(major == 7) {
// TU116 does not have tensor cores!
// https://www.anandtech.com/show/13973/nvidia-gtx-1660-ti-review-feat-evga-xc-gaming/2
if(arch->chip == CHIP_TU116 || arch->chip == CHIP_TU116BM ||
arch->chip == CHIP_TU116GL || arch->chip == CHIP_TU116M) {
if (is_chip_TU116(arch))
return 0;
}
return sm * 8;
}
else if(major == 8) return sm * 4;

View File

@@ -8,6 +8,7 @@
#include "../common/uarch.hpp"
#include "../common/global.hpp"
#include "../common/gpu.hpp"
#include "pci.hpp"
#include "chips.hpp"
// Any clock multiplier
@@ -361,3 +362,8 @@ void free_uarch_struct(struct uarch* arch) {
free(arch->chip_str);
free(arch);
}
bool is_chip_TU116(struct uarch* arch) {
return arch->chip == CHIP_TU116 || arch->chip == CHIP_TU116BM ||
arch->chip == CHIP_TU116GL || arch->chip == CHIP_TU116M;
}

View File

@@ -13,5 +13,6 @@ char* get_str_cc(struct uarch* arch);
char* get_str_chip(struct uarch* arch);
char* get_str_process(struct uarch* arch);
void free_uarch_struct(struct uarch* arch);
bool is_chip_TU116(struct uarch* arch);
#endif

37
src/hsa/chips.hpp Normal file
View File

@@ -0,0 +1,37 @@
#ifndef __HSA_GPUCHIPS__
#define __HSA_GPUCHIPS__
typedef uint32_t GPUCHIP;
enum {
CHIP_UNKNOWN_HSA,
// VEGA (TODO)
// ...
// RDNA
CHIP_NAVI_10,
CHIP_NAVI_12,
CHIP_NAVI_14,
// RDNA2
// There are way more (eg Oberon)
// Maybe we'll add them in the future.
CHIP_NAVI_21,
CHIP_NAVI_22,
CHIP_NAVI_23,
CHIP_NAVI_24,
// RDNA3
// There are way more as well.
// Supporting Navi only for now.
CHIP_NAVI_31,
CHIP_NAVI_32,
CHIP_NAVI_33,
// RDNA4
CHIP_NAVI_44,
CHIP_NAVI_48,
// CDNA
CHIP_ARCTURUS, // MI100 series
CHIP_ALDEBARAN, // MI200 series
CHIP_AQUA_VANJARAM, // MI300 series
CHIP_CDNA_NEXT // MI350 series
};
#endif

View File

@@ -12,7 +12,7 @@
#include <hsa/hsa_ext_amd.h>
#include "hsa.hpp"
#include "../common/pci.hpp"
#include "uarch.hpp"
#include "../common/global.hpp"
#include "../common/uarch.hpp"
@@ -34,8 +34,7 @@ struct agent_info {
snprintf(&(err_val[0]), sizeof(err_val), "%#x", (uint32_t)err); \
err_str = &(err_val[0]); \
} \
printErr("HSA failure at: %s:%d\n", \
__FILE__, __LINE__); \
printErr("HSA failure at: %s:%d\n", __FILE__, __LINE__); \
printErr("Call returned %s\n", err_str); \
return (err); \
} \
@@ -52,7 +51,6 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, info->gpu_name);
RET_IF_HSA_ERR(err);
// TODO: What if vendor_name is not AMD?
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, info->vendor_name);
RET_IF_HSA_ERR(err);
@@ -77,7 +75,7 @@ struct topology_h* get_topology_info(struct agent_info info) {
return topo;
}
struct gpu_info* get_gpu_info_hsa(struct pci_dev *devices, int gpu_idx) {
struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL;
gpu->idx = gpu_idx;
@@ -92,11 +90,8 @@ struct gpu_info* get_gpu_info_hsa(struct pci_dev *devices, int gpu_idx) {
return NULL;
}
hsa_status_t status;
// Initialize the HSA runtime
status = hsa_init();
if (status != HSA_STATUS_SUCCESS) {
hsa_status_t err = hsa_init();
if (err != HSA_STATUS_SUCCESS) {
printErr("Failed to initialize HSA runtime");
return NULL;
}
@@ -105,23 +100,35 @@ struct gpu_info* get_gpu_info_hsa(struct pci_dev *devices, int gpu_idx) {
info.deviceId = gpu_idx;
// Iterate over all agents in the system
status = hsa_iterate_agents(agent_callback, &info);
if (status != HSA_STATUS_SUCCESS) {
err = hsa_iterate_agents(agent_callback, &info);
if (err != HSA_STATUS_SUCCESS) {
printErr("Failed to iterate HSA agents");
hsa_shut_down();
return NULL;
}
gpu->freq = info.max_clock_freq;
if (strcmp(info.vendor_name, "AMD") != 0) {
printErr("HSA vendor name is: '%s'. Only AMD is supported!", info.vendor_name);
return NULL;
}
gpu->vendor = GPU_VENDOR_AMD;
gpu->freq = info.max_clock_freq;
gpu->topo_h = get_topology_info(info);
gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1));
strcpy(gpu->name, info.device_mkt_name);
gpu->topo_h = get_topology_info(info);
gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name);
// TODO: Use gpu_name for uarch detection
if (gpu->arch == NULL) {
return NULL;
}
// Shut down the HSA runtime
hsa_shut_down();
err = hsa_shut_down();
if (err != HSA_STATUS_SUCCESS) {
printErr("Failed to shutdown HSA runtime");
return NULL;
}
return gpu;
}

View File

@@ -3,7 +3,7 @@
#include "../common/gpu.hpp"
struct gpu_info* get_gpu_info_hsa(struct pci_dev *devices, int gpu_idx);
struct gpu_info* get_gpu_info_hsa(int gpu_idx);
char* get_str_cu(struct gpu_info* gpu);
#endif

321
src/hsa/uarch.cpp Normal file
View File

@@ -0,0 +1,321 @@
#include <cstdlib>
#include <cstdint>
#include <cstring>
#include "../common/uarch.hpp"
#include "../common/global.hpp"
#include "../common/gpu.hpp"
#include "chips.hpp"
// MICROARCH values
enum {
UARCH_UNKNOWN,
// GCN (Graphics Core Next)
// Empty for now
// ...
// RDNA (Radeon DNA)
UARCH_RDNA,
UARCH_RDNA2,
UARCH_RDNA3,
UARCH_RDNA4,
// CDNA (Compute DNA)
UARCH_CDNA,
UARCH_CDNA2,
UARCH_CDNA3,
UARCH_CDNA4
};
static const char *uarch_str[] = {
/*[ARCH_UNKNOWN] = */ STRING_UNKNOWN,
/*[UARCH_RDNA] = */ "RDNA",
/*[UARCH_RDNA2] = */ "RDNA2",
/*[UARCH_RDNA3] = */ "RDNA3",
/*[UARCH_RDNA4] = */ "RDNA4",
/*[UARCH_CDNA] = */ "CDNA",
/*[UARCH_CDNA2] = */ "CDNA2",
/*[UARCH_CDNA3] = */ "CDNA3",
/*[UARCH_CDNA4] = */ "CDNA4",
};
// Sources:
// - https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html
// - https://www.techpowerup.com
//
// This is sometimes refered to as LLVM target, but also shader ISA.
//
// LLVM target *usually* maps to a specific architecture. However there
// are case where this is not true:
// MI8 is GCN3.0 with LLVM target gfx803
// MI6 is GCN4.0 with LLVM target gfx803
// or
// Strix Point can be gfx1150 or gfx1151
//
// NOTE: GCN chips are stored for completeness, but they are
// not actively supported.
enum {
TARGET_UNKNOWN_HSA,
/// GCN (Graphics Core Next)
/// ------------------------
// GCN 1.0
TARGET_GFX600,
TARGET_GFX601,
TARGET_GFX602,
// GCN 2.0
TARGET_GFX700,
TARGET_GFX701,
TARGET_GFX702,
TARGET_GFX703,
TARGET_GFX704,
TARGET_GFX705,
// GCN 3.0 / 4.0
TARGET_GFX801,
TARGET_GFX802,
TARGET_GFX803,
TARGET_GFX805,
TARGET_GFX810,
// GCN 5.0
TARGET_GFX900,
TARGET_GFX902,
TARGET_GFX904,
// GCN 5.1
TARGET_GFX906,
// ???
TARGET_GFX909,
TARGET_GFX90C,
/// RDNA (Radeon DNA)
/// -----------------
// RDNA1
TARGET_GFX1010,
TARGET_GFX1011,
TARGET_GFX1012,
// RDNA2
TARGET_GFX1013, // Oberon
TARGET_GFX1030,
TARGET_GFX1031,
TARGET_GFX1032,
TARGET_GFX1033,
TARGET_GFX1034,
TARGET_GFX1035, // ??
TARGET_GFX1036, // ??
// RDNA3
TARGET_GFX1100,
TARGET_GFX1101,
TARGET_GFX1102,
TARGET_GFX1103, // ???
// RDNA3.5
TARGET_GFX1150, // Strix Point
TARGET_GFX1151, // Strix Halo / Strix Point
TARGET_GFX1152, // Krackan Point
TARGET_GFX1153, // ???
// RDNA4
TARGET_GFX1200,
TARGET_GFX1201,
TARGET_GFX1250, // ???
TARGET_GFX1251, // ???
/// CDNA (Compute DNA)
/// ------------------
// CDNA
TARGET_GFX908,
// CDNA2
TARGET_GFX90A,
// CDNA3
TARGET_GFX942,
// CDNA4
TARGET_GFX950
};
#define CHECK_UARCH_START if (false) {}
#define CHECK_UARCH(arch, chip_, str, uarch, process) \
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_HSA) printBug("map_chip_to_uarch_hsa: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, UNK); }
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->chip_str, str);
arch->uarch = u;
arch->process = process;
}
// On chiplet based chips (such as Navi31, Navi32, etc),
// we have 2 different processes: The MCD process and the
// rest of the chip process. They might be different and here
// we just take one - let's take MCD process for now.
//
// TODO: Should we differentiate?
void map_chip_to_uarch_hsa(struct uarch* arch) {
CHECK_UARCH_START
// RDNA
CHECK_UARCH(arch, CHIP_NAVI_10, "Navi 10", UARCH_RDNA, 7)
CHECK_UARCH(arch, CHIP_NAVI_12, "Navi 12", UARCH_RDNA, 7)
CHECK_UARCH(arch, CHIP_NAVI_14, "Navi 14", UARCH_RDNA, 7)
CHECK_UARCH(arch, CHIP_NAVI_21, "Navi 21", UARCH_RDNA2, 7)
CHECK_UARCH(arch, CHIP_NAVI_22, "Navi 22", UARCH_RDNA2, 7)
CHECK_UARCH(arch, CHIP_NAVI_23, "Navi 23", UARCH_RDNA2, 7)
CHECK_UARCH(arch, CHIP_NAVI_24, "Navi 24", UARCH_RDNA2, 6)
CHECK_UARCH(arch, CHIP_NAVI_31, "Navi 31", UARCH_RDNA3, 6)
CHECK_UARCH(arch, CHIP_NAVI_32, "Navi 32", UARCH_RDNA3, 6)
CHECK_UARCH(arch, CHIP_NAVI_33, "Navi 33", UARCH_RDNA3, 6)
CHECK_UARCH(arch, CHIP_NAVI_44, "Navi 44", UARCH_RDNA4, 4)
CHECK_UARCH(arch, CHIP_NAVI_48, "Navi 48", UARCH_RDNA4, 4)
// CDNA
// NOTE: We will not show chip name for CDNA, thus use empty str
CHECK_UARCH(arch, CHIP_ARCTURUS, "", UARCH_CDNA, 7)
CHECK_UARCH(arch, CHIP_ALDEBARAN, "", UARCH_CDNA2, 6)
CHECK_UARCH(arch, CHIP_AQUA_VANJARAM, "", UARCH_CDNA3, 6)
CHECK_UARCH(arch, CHIP_CDNA_NEXT, "", UARCH_CDNA4, 6) // big difference between MCD and rest of the chip process
CHECK_UARCH_END
}
#define CHECK_TGT_START if (false) {}
#define CHECK_TGT(target, llvm_target, chip) \
else if (target == llvm_target) return chip;
#define CHECK_TGT_END else { printBug("LLVM target '%d' has no matching chip", target); return CHIP_UNKNOWN_HSA; }
// We have at least 2 choices to infer the chip:
//
// - LLVM target (e.g., gfx1101 is Navi 32)
// - PCI ID (e.g., 0x7470 is Navi 32)
//
// For now we will use the first approach, which seems to have
// some issues like mentioned in the enum.
// However PCI detection is also not perfect, since it is
// quite hard to find PCI ids from old hardware.
GPUCHIP get_chip_from_target_hsa(int32_t target) {
CHECK_TGT_START
/// RDNA
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX1010, CHIP_NAVI_10)
CHECK_TGT(target, TARGET_GFX1011, CHIP_NAVI_12)
CHECK_TGT(target, TARGET_GFX1012, CHIP_NAVI_14)
// CHECK_TGT(target, TARGET_GFX1013, TODO)
/// RDNA2
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX1030, CHIP_NAVI_21)
CHECK_TGT(target, TARGET_GFX1031, CHIP_NAVI_22)
CHECK_TGT(target, TARGET_GFX1032, CHIP_NAVI_23)
CHECK_TGT(target, TARGET_GFX1033, CHIP_NAVI_21)
CHECK_TGT(target, TARGET_GFX1034, CHIP_NAVI_24)
// CHECK_TGT(target, TARGET_GFX1035, TODO)
// CHECK_TGT(target, TARGET_GFX1036, TODO)
/// RDNA3
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX1100, CHIP_NAVI_31)
CHECK_TGT(target, TARGET_GFX1101, CHIP_NAVI_32)
CHECK_TGT(target, TARGET_GFX1102, CHIP_NAVI_33)
// CHECK_TGT(target, TARGET_GFX1103, TODO)
/// RDNA3.5
/// -------------------------------------------
// CHECK_TGT(target, TARGET_GFX1150, TODO)
// CHECK_TGT(target, TARGET_GFX1151, TODO)
// CHECK_TGT(target, TARGET_GFX1152, TODO)
// CHECK_TGT(target, TARGET_GFX1153, TODO)
/// RDNA4
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX1200, CHIP_NAVI_44)
CHECK_TGT(target, TARGET_GFX1201, CHIP_NAVI_48)
// CHECK_TGT(target, TARGET_GFX1250, TODO)
// CHECK_TGT(target, TARGET_GFX1251, TODO)
/// CDNA
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX908, CHIP_ARCTURUS)
/// CDNA2
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX90A, CHIP_ALDEBARAN)
/// CDNA3
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX942, CHIP_AQUA_VANJARAM)
/// CDNA4
/// -------------------------------------------
CHECK_TGT(target, TARGET_GFX950, CHIP_CDNA_NEXT)
CHECK_TGT_END
}
#define CHECK_TGT_STR_START if (false) {}
#define CHECK_TGT_STR(target, llvm_target, chip) \
else if (strcmp(target, llvm_target) == 0) return chip;
#define CHECK_TGT_STR_END else { return TARGET_UNKNOWN_HSA; }
// Maps the LLVM target string to the enum value
int32_t get_llvm_target_from_str(char* target) {
// TODO: Autogenerate this
// TODO: Add all, not only the ones we support in get_chip_from_target_hsa
CHECK_TGT_STR_START
CHECK_TGT_STR(target, "gfx1010", TARGET_GFX1010)
CHECK_TGT_STR(target, "gfx1011", TARGET_GFX1011)
CHECK_TGT_STR(target, "gfx1012", TARGET_GFX1012)
CHECK_TGT_STR(target, "gfx1013", TARGET_GFX1013)
CHECK_TGT_STR(target, "gfx1030", TARGET_GFX1030)
CHECK_TGT_STR(target, "gfx1031", TARGET_GFX1031)
CHECK_TGT_STR(target, "gfx1032", TARGET_GFX1032)
CHECK_TGT_STR(target, "gfx1033", TARGET_GFX1033)
CHECK_TGT_STR(target, "gfx1034", TARGET_GFX1034)
CHECK_TGT_STR(target, "gfx1035", TARGET_GFX1035)
CHECK_TGT_STR(target, "gfx1036", TARGET_GFX1036)
CHECK_TGT_STR(target, "gfx1100", TARGET_GFX1100)
CHECK_TGT_STR(target, "gfx1101", TARGET_GFX1101)
CHECK_TGT_STR(target, "gfx1102", TARGET_GFX1102)
CHECK_TGT_STR(target, "gfx1103", TARGET_GFX1103)
CHECK_TGT_STR(target, "gfx1200", TARGET_GFX1200)
CHECK_TGT_STR(target, "gfx1201", TARGET_GFX1201)
CHECK_TGT_STR(target, "gfx1250", TARGET_GFX1250)
CHECK_TGT_STR(target, "gfx1251", TARGET_GFX1251)
CHECK_TGT_STR(target, "gfx908", TARGET_GFX908)
CHECK_TGT_STR(target, "gfx90a", TARGET_GFX90A)
CHECK_TGT_STR(target, "gfx942", TARGET_GFX942)
CHECK_TGT_STR(target, "gfx950", TARGET_GFX950)
CHECK_TGT_STR_END
}
struct uarch* get_uarch_from_hsa(struct gpu_info* gpu, char* gpu_name) {
struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
arch->llvm_target = get_llvm_target_from_str(gpu_name);
if (arch->llvm_target == TARGET_UNKNOWN_HSA) {
printErr("Unknown LLVM target: '%s'", gpu_name);
return NULL;
}
arch->chip_str = NULL;
arch->chip = get_chip_from_target_hsa(arch->llvm_target);
map_chip_to_uarch_hsa(arch);
return arch;
}
bool is_uarch_valid(struct uarch* arch) {
if (arch == NULL) {
printBug("Invalid uarch: arch is NULL");
return false;
}
if (arch->uarch >= UARCH_UNKNOWN && arch->uarch <= UARCH_CDNA4) {
return true;
}
else {
printBug("Invalid uarch: %d", arch->uarch);
return false;
}
}
bool is_cdna(struct uarch* arch) {
return arch->uarch == UARCH_CDNA ||
arch->uarch == UARCH_CDNA2 ||
arch->uarch == UARCH_CDNA3 ||
arch->uarch == UARCH_CDNA4;
}
char* get_str_chip(struct uarch* arch) {
// We dont want to show CDNA chip names as they add
// no value, since each architecture maps one to one
// to a chip.
if (is_cdna(arch)) return NULL;
return arch->chip_str;
}
const char* get_str_uarch_hsa(struct uarch* arch) {
if (!is_uarch_valid(arch)) {
return NULL;
}
return uarch_str[arch->uarch];
}

13
src/hsa/uarch.hpp Normal file
View File

@@ -0,0 +1,13 @@
#ifndef __HSA_UARCH__
#define __HSA_UARCH__
#include "../common/gpu.hpp"
struct uarch;
struct uarch* get_uarch_from_hsa(struct gpu_info* gpu, char* gpu_name);
char* get_str_uarch_hsa(struct uarch* arch);
char* get_str_process(struct uarch* arch); // TODO: Shouldnt we define this in the cpp?
char* get_str_chip(struct uarch* arch);
#endif