1 Commits

Author SHA1 Message Date
Dr-Noob
47091fded5 Improve build script 2025-10-17 07:39:02 +02:00
6 changed files with 89 additions and 150 deletions

View File

@@ -1,5 +1,24 @@
#!/bin/bash #!/bin/bash
print_help() {
cat << EOF
Usage: $0 <backends> [build_type]
<backends> MANDATORY. Comma-separated list of
backends to enable.
Valid options: hsa, intel, cuda
Example: hsa,cuda
[build_type] OPTIONAL. Build type. Valid options:
debug, release (default: release)
Examples:
$0 hsa,intel debug
$0 cuda
$0 hsa,intel,cuda release
EOF
}
# gpufetch build script # gpufetch build script
set -e set -e
@@ -7,19 +26,79 @@ rm -rf build/ gpufetch
mkdir build/ mkdir build/
cd build/ cd build/
if [ "$1" == "debug" ] if [ "$1" == "--help" ]
then then
BUILD_TYPE="Debug" echo "gpufetch build script"
else echo
BUILD_TYPE="Release" print_help
exit 0
fi fi
if [[ $# -lt 1 ]]; then
echo "ERROR: At least one backend must be specified."
echo
print_help
exit 1
fi
# Determine if last argument is build type
LAST_ARG="${!#}"
if [[ "$LAST_ARG" == "debug" || "$LAST_ARG" == "release" ]]; then
BUILD_TYPE="$LAST_ARG"
BACKEND_ARG="${1}"
else
BUILD_TYPE="release"
BACKEND_ARG="${1}"
fi
# Split comma-separated backends into an array
IFS=',' read -r -a BACKENDS <<< "$BACKEND_ARG"
# Validate build type
if [[ "$BUILD_TYPE" != "debug" && "$BUILD_TYPE" != "release" ]]
then
echo "Error: Invalid build type '$BUILD_TYPE'."
echo "Valid options are: debug, release"
exit 1
fi
# From lower to upper case
CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=${BUILD_TYPE^}"
# Validate backends
VALID_BACKENDS=("hsa" "intel" "cuda")
for BACKEND in "${BACKENDS[@]}"; do
case "$BACKEND" in
hsa)
CMAKE_FLAGS+=" -DENABLE_HSA_BACKEND=ON"
;;
intel)
CMAKE_FLAGS+=" -DENABLE_INTEL_BACKEND=ON"
;;
cuda)
CMAKE_FLAGS+=" -DENABLE_CUDA_BACKEND=ON"
;;
*)
echo "ERROR: Invalid backend '$BACKEND'."
echo "Valid options: ${VALID_BACKENDS[*]}"
exit 1
;;
esac
done
# You can also manually specify the compilation flags.
# If you need to, just run the cmake command directly
# instead of using this script.
#
# Here you will find some help:
#
# In case you have CUDA installed but it is not detected, # In case you have CUDA installed but it is not detected,
# - set CMAKE_CUDA_COMPILER to your nvcc binary: # - set CMAKE_CUDA_COMPILER to your nvcc binary:
# - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir # - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir
# for example: # for example:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ .. # cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
#
# In case you want to explicitely disable a backend, you can: # In case you want to explicitely disable a backend, you can:
# Disable CUDA backend: # Disable CUDA backend:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_CUDA_BACKEND=OFF .. # cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_CUDA_BACKEND=OFF ..
@@ -28,7 +107,9 @@ fi
# Disable Intel backend: # Disable Intel backend:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_INTEL_BACKEND=OFF .. # cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_INTEL_BACKEND=OFF ..
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE .. echo "$0: Running cmake $CMAKE_FLAGS"
echo
cmake $CMAKE_FLAGS ..
os=$(uname) os=$(uname)
if [ "$os" == 'Linux' ]; then if [ "$os" == 'Linux' ]; then

View File

@@ -101,17 +101,6 @@ char* get_str_bus_width(struct gpu_info* gpu) {
return string; return string;
} }
char* get_str_lds_size(struct gpu_info* gpu) {
// TODO: Show XX KB (XX MB Total) like in cpufetch
uint32_t size = 3+1+3+1;
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = (char *) ecalloc(size, sizeof(char));
sprintf(string, "%d KB", gpu->mem->lds_size / 1024);
return string;
}
char* get_str_memory_clock(struct gpu_info* gpu) { char* get_str_memory_clock(struct gpu_info* gpu) {
return get_freq_as_str_mhz(gpu->mem->freq); return get_freq_as_str_mhz(gpu->mem->freq);
} }

View File

@@ -46,10 +46,6 @@ struct topology_c {
// HSA topology // HSA topology
struct topology_h { struct topology_h {
int32_t compute_units; int32_t compute_units;
int32_t num_shader_engines;
int32_t simds_per_cu;
int32_t num_xcc;
int32_t matrix_cores;
}; };
// Intel topology // Intel topology
@@ -65,7 +61,6 @@ struct memory {
int32_t bus_width; int32_t bus_width;
int32_t freq; int32_t freq;
int32_t clk_mul; // clock multiplier int32_t clk_mul; // clock multiplier
int32_t lds_size; // HSA specific for now
}; };
struct gpu_info { struct gpu_info {
@@ -93,7 +88,6 @@ char* get_str_freq(struct gpu_info* gpu);
char* get_str_memory_size(struct gpu_info* gpu); char* get_str_memory_size(struct gpu_info* gpu);
char* get_str_memory_type(struct gpu_info* gpu); char* get_str_memory_type(struct gpu_info* gpu);
char* get_str_bus_width(struct gpu_info* gpu); char* get_str_bus_width(struct gpu_info* gpu);
char* get_str_lds_size(struct gpu_info* gpu);
char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu);

View File

@@ -48,17 +48,14 @@ enum {
ATTRIBUTE_FREQUENCY, // ALL ATTRIBUTE_FREQUENCY, // ALL
ATTRIBUTE_PEAK, // ALL ATTRIBUTE_PEAK, // ALL
ATTRIBUTE_COMPUTE_UNITS, // HSA ATTRIBUTE_COMPUTE_UNITS, // HSA
ATTRIBUTE_MATRIX_CORES, // HSA
ATTRIBUTE_XCDS, // HSA
ATTRIBUTE_LDS_SIZE, // HSA
ATTRIBUTE_STREAMINGMP, // CUDA ATTRIBUTE_STREAMINGMP, // CUDA
ATTRIBUTE_CORESPERMP, // CUDA ATTRIBUTE_CORESPERMP, // CUDA
ATTRIBUTE_CUDA_CORES, // CUDA ATTRIBUTE_CUDA_CORES, // CUDA
ATTRIBUTE_TENSOR_CORES, // CUDA ATTRIBUTE_TENSOR_CORES, // CUDA
ATTRIBUTE_L2, // CUDA ATTRIBUTE_L2, // CUDA
ATTRIBUTE_MEMORY, // CUDA,HSA ATTRIBUTE_MEMORY, // CUDA
ATTRIBUTE_MEMORY_FREQ, // CUDA ATTRIBUTE_MEMORY_FREQ, // CUDA
ATTRIBUTE_BUS_WIDTH, // CUDA,HSA ATTRIBUTE_BUS_WIDTH, // CUDA
ATTRIBUTE_PEAK_TENSOR, // CUDA ATTRIBUTE_PEAK_TENSOR, // CUDA
ATTRIBUTE_EUS, // Intel ATTRIBUTE_EUS, // Intel
ATTRIBUTE_GT, // Intel ATTRIBUTE_GT, // Intel
@@ -72,9 +69,6 @@ static const AttributeField ATTRIBUTE_INFO[] = {
{ ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" }, { ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" },
{ ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" }, { ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" },
{ ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" }, { ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" },
{ ATTRIBUTE_MATRIX_CORES, "Matrix Cores: ", "Matrix Cores:" },
{ ATTRIBUTE_XCDS, "XCDs:", "XCDs" },
{ ATTRIBUTE_LDS_SIZE, "LDS size:", "LDS:" },
{ ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" }, { ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" },
{ ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" }, { ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" },
{ ATTRIBUTE_CUDA_CORES, "CUDA Cores:", "CUDA Cores:" }, { ATTRIBUTE_CUDA_CORES, "CUDA Cores:", "CUDA Cores:" },
@@ -492,12 +486,7 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
char* uarch = get_str_uarch_hsa(gpu->arch); char* uarch = get_str_uarch_hsa(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch); char* manufacturing_process = get_str_process(gpu->arch);
char* cus = get_str_cu(gpu); char* cus = get_str_cu(gpu);
char* matrix_cores = get_str_matrix_cores(gpu);
char* xcds = get_str_xcds(gpu);
char* max_frequency = get_str_freq(gpu); char* max_frequency = get_str_freq(gpu);
char* bus_width = get_str_bus_width(gpu);
char* mem_size = get_str_memory_size(gpu);
char* lds_size = get_str_lds_size(gpu);
setAttribute(art, ATTRIBUTE_NAME, gpu_name); setAttribute(art, ATTRIBUTE_NAME, gpu_name);
if (gpu_chip != NULL) { if (gpu_chip != NULL) {
@@ -507,13 +496,6 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus); setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus);
setAttribute(art, ATTRIBUTE_MATRIX_CORES, matrix_cores);
if (xcds != NULL) {
setAttribute(art, ATTRIBUTE_XCDS, xcds);
}
setAttribute(art, ATTRIBUTE_LDS_SIZE, lds_size);
setAttribute(art, ATTRIBUTE_MEMORY, mem_size);
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
bool use_short = false; bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, use_short); uint32_t longest_attribute = longest_attribute_length(art, use_short);

View File

@@ -22,16 +22,7 @@ struct agent_info {
char vendor_name[64]; char vendor_name[64];
char device_mkt_name[64]; char device_mkt_name[64];
uint32_t max_clock_freq; uint32_t max_clock_freq;
// Memory
uint32_t bus_width;
uint32_t lds_size;
uint64_t global_size;
// Topology
uint32_t compute_unit; uint32_t compute_unit;
uint32_t num_shader_engines;
uint32_t simds_per_cu;
uint32_t num_xcc; // Acccelerator Complex Dies (XCDs)
uint32_t matrix_cores; // Cores with WMMA/MFMA capabilities
}; };
#define RET_IF_HSA_ERR(err) { \ #define RET_IF_HSA_ERR(err) { \
@@ -49,51 +40,6 @@ struct agent_info {
} \ } \
} }
hsa_status_t memory_pool_callback(hsa_amd_memory_pool_t pool, void* data) {
struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
hsa_amd_segment_t segment;
hsa_status_t err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
RET_IF_HSA_ERR(err);
if (segment == HSA_AMD_SEGMENT_GROUP) {
// LDS memory
// We want to make sure that this memory pool is not repeated.
if (info->lds_size != 0) {
printErr("Found HSA_AMD_SEGMENT_GROUP twice!");
return HSA_STATUS_ERROR;
}
uint32_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
RET_IF_HSA_ERR(err);
info->lds_size = size;
}
else if (segment == HSA_AMD_SEGMENT_GLOBAL) {
// Global memory
uint32_t global_flags = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flags);
RET_IF_HSA_ERR(err);
if (global_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) {
if (info->global_size != 0) {
printErr("Found HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED twice!");
return HSA_STATUS_ERROR;
}
uint64_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
RET_IF_HSA_ERR(err);
info->global_size = size;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t agent_callback(hsa_agent_t agent, void *data) { hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
struct agent_info* info = reinterpret_cast<struct agent_info *>(data); struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
@@ -116,26 +62,6 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit);
RET_IF_HSA_ERR(err); RET_IF_HSA_ERR(err);
// According to the documentation, this is deprecated. But what should I be using then?
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, &info->num_shader_engines);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &info->simds_per_cu);
RET_IF_HSA_ERR(err);
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_NUM_XCC, &info->num_xcc);
RET_IF_HSA_ERR(err);
// We will check against zero to see if it was set beforehand.
info->global_size = 0;
info->lds_size = 0;
// This will fill global_size and lds_size.
err = hsa_amd_agent_iterate_memory_pools(agent, memory_pool_callback, data);
RET_IF_HSA_ERR(err);
} }
return HSA_STATUS_SUCCESS; return HSA_STATUS_SUCCESS;
@@ -145,26 +71,10 @@ struct topology_h* get_topology_info(struct agent_info info) {
struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h)); struct topology_h* topo = (struct topology_h*) emalloc(sizeof(struct topology_h));
topo->compute_units = info.compute_unit; topo->compute_units = info.compute_unit;
topo->num_shader_engines = info.num_shader_engines; // not printed at the moment
topo->simds_per_cu = info.simds_per_cu; // not printed at the moment
topo->num_xcc = info.num_xcc;
// Old GPUs (GCN I guess) might not have matrix cores.
// Not sure what would happen here?
topo->matrix_cores = topo->compute_units * topo->simds_per_cu;
return topo; return topo;
} }
struct memory* get_memory_info(struct gpu_info* gpu, struct agent_info info) {
struct memory* mem = (struct memory*) emalloc(sizeof(struct memory));
mem->bus_width = info.bus_width;
mem->lds_size = info.lds_size;
mem->size_bytes = info.global_size;
return mem;
}
struct gpu_info* get_gpu_info_hsa(int gpu_idx) { struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL; gpu->pci = NULL;
@@ -208,7 +118,6 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1)); gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1));
strcpy(gpu->name, info.device_mkt_name); strcpy(gpu->name, info.device_mkt_name);
gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name); gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name);
gpu->mem = get_memory_info(gpu, info);
if (gpu->arch == NULL) { if (gpu->arch == NULL) {
return NULL; return NULL;
@@ -226,17 +135,3 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
char* get_str_cu(struct gpu_info* gpu) { char* get_str_cu(struct gpu_info* gpu) {
return get_str_generic(gpu->topo_h->compute_units); return get_str_generic(gpu->topo_h->compute_units);
} }
char* get_str_xcds(struct gpu_info* gpu) {
// If there is a single XCD, then we dont want to
// print it.
if (gpu->topo_h->num_xcc == 1) {
return NULL;
}
return get_str_generic(gpu->topo_h->num_xcc);
}
char* get_str_matrix_cores(struct gpu_info* gpu) {
// TODO: Show XX (WMMA/MFMA)
return get_str_generic(gpu->topo_h->matrix_cores);
}

View File

@@ -5,7 +5,5 @@
struct gpu_info* get_gpu_info_hsa(int gpu_idx); struct gpu_info* get_gpu_info_hsa(int gpu_idx);
char* get_str_cu(struct gpu_info* gpu); char* get_str_cu(struct gpu_info* gpu);
char* get_str_xcds(struct gpu_info* gpu);
char* get_str_matrix_cores(struct gpu_info* gpu);
#endif #endif