1 Commits

Author SHA1 Message Date
Dr-Noob
47091fded5 Improve build script 2025-10-17 07:39:02 +02:00
5 changed files with 89 additions and 91 deletions

View File

@@ -1,5 +1,24 @@
#!/bin/bash #!/bin/bash
print_help() {
cat << EOF
Usage: $0 <backends> [build_type]
<backends> MANDATORY. Comma-separated list of
backends to enable.
Valid options: hsa, intel, cuda
Example: hsa,cuda
[build_type] OPTIONAL. Build type. Valid options:
debug, release (default: release)
Examples:
$0 hsa,intel debug
$0 cuda
$0 hsa,intel,cuda release
EOF
}
# gpufetch build script # gpufetch build script
set -e set -e
@@ -7,19 +26,79 @@ rm -rf build/ gpufetch
mkdir build/ mkdir build/
cd build/ cd build/
if [ "$1" == "debug" ] if [ "$1" == "--help" ]
then then
BUILD_TYPE="Debug" echo "gpufetch build script"
else echo
BUILD_TYPE="Release" print_help
exit 0
fi fi
if [[ $# -lt 1 ]]; then
echo "ERROR: At least one backend must be specified."
echo
print_help
exit 1
fi
# Determine if last argument is build type
LAST_ARG="${!#}"
if [[ "$LAST_ARG" == "debug" || "$LAST_ARG" == "release" ]]; then
BUILD_TYPE="$LAST_ARG"
BACKEND_ARG="${1}"
else
BUILD_TYPE="release"
BACKEND_ARG="${1}"
fi
# Split comma-separated backends into an array
IFS=',' read -r -a BACKENDS <<< "$BACKEND_ARG"
# Validate build type
if [[ "$BUILD_TYPE" != "debug" && "$BUILD_TYPE" != "release" ]]
then
echo "Error: Invalid build type '$BUILD_TYPE'."
echo "Valid options are: debug, release"
exit 1
fi
# From lower to upper case
CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=${BUILD_TYPE^}"
# Validate backends
VALID_BACKENDS=("hsa" "intel" "cuda")
for BACKEND in "${BACKENDS[@]}"; do
case "$BACKEND" in
hsa)
CMAKE_FLAGS+=" -DENABLE_HSA_BACKEND=ON"
;;
intel)
CMAKE_FLAGS+=" -DENABLE_INTEL_BACKEND=ON"
;;
cuda)
CMAKE_FLAGS+=" -DENABLE_CUDA_BACKEND=ON"
;;
*)
echo "ERROR: Invalid backend '$BACKEND'."
echo "Valid options: ${VALID_BACKENDS[*]}"
exit 1
;;
esac
done
# You can also manually specify the compilation flags.
# If you need to, just run the cmake command directly
# instead of using this script.
#
# Here you will find some help:
#
# In case you have CUDA installed but it is not detected, # In case you have CUDA installed but it is not detected,
# - set CMAKE_CUDA_COMPILER to your nvcc binary: # - set CMAKE_CUDA_COMPILER to your nvcc binary:
# - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir # - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir
# for example: # for example:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ .. # cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
#
# In case you want to explicitely disable a backend, you can: # In case you want to explicitely disable a backend, you can:
# Disable CUDA backend: # Disable CUDA backend:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_CUDA_BACKEND=OFF .. # cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_CUDA_BACKEND=OFF ..
@@ -28,7 +107,9 @@ fi
# Disable Intel backend: # Disable Intel backend:
# cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_INTEL_BACKEND=OFF .. # cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DENABLE_INTEL_BACKEND=OFF ..
cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE .. echo "$0: Running cmake $CMAKE_FLAGS"
echo
cmake $CMAKE_FLAGS ..
os=$(uname) os=$(uname)
if [ "$os" == 'Linux' ]; then if [ "$os" == 'Linux' ]; then

View File

@@ -101,17 +101,6 @@ char* get_str_bus_width(struct gpu_info* gpu) {
return string; return string;
} }
char* get_str_lds_size(struct gpu_info* gpu) {
// TODO: Show XX KB (XX MB Total) like in cpufetch
uint32_t size = 3+1+3+1;
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = (char *) ecalloc(size, sizeof(char));
sprintf(string, "%d KB", gpu->mem->lds_size / 1024);
return string;
}
char* get_str_memory_clock(struct gpu_info* gpu) { char* get_str_memory_clock(struct gpu_info* gpu) {
return get_freq_as_str_mhz(gpu->mem->freq); return get_freq_as_str_mhz(gpu->mem->freq);
} }

View File

@@ -61,7 +61,6 @@ struct memory {
int32_t bus_width; int32_t bus_width;
int32_t freq; int32_t freq;
int32_t clk_mul; // clock multiplier int32_t clk_mul; // clock multiplier
int32_t lds_size; // HSA specific for now
}; };
struct gpu_info { struct gpu_info {
@@ -89,7 +88,6 @@ char* get_str_freq(struct gpu_info* gpu);
char* get_str_memory_size(struct gpu_info* gpu); char* get_str_memory_size(struct gpu_info* gpu);
char* get_str_memory_type(struct gpu_info* gpu); char* get_str_memory_type(struct gpu_info* gpu);
char* get_str_bus_width(struct gpu_info* gpu); char* get_str_bus_width(struct gpu_info* gpu);
char* get_str_lds_size(struct gpu_info* gpu);
char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu);

View File

@@ -48,15 +48,14 @@ enum {
ATTRIBUTE_FREQUENCY, // ALL ATTRIBUTE_FREQUENCY, // ALL
ATTRIBUTE_PEAK, // ALL ATTRIBUTE_PEAK, // ALL
ATTRIBUTE_COMPUTE_UNITS, // HSA ATTRIBUTE_COMPUTE_UNITS, // HSA
ATTRIBUTE_LDS_SIZE, // HSA
ATTRIBUTE_STREAMINGMP, // CUDA ATTRIBUTE_STREAMINGMP, // CUDA
ATTRIBUTE_CORESPERMP, // CUDA ATTRIBUTE_CORESPERMP, // CUDA
ATTRIBUTE_CUDA_CORES, // CUDA ATTRIBUTE_CUDA_CORES, // CUDA
ATTRIBUTE_TENSOR_CORES, // CUDA ATTRIBUTE_TENSOR_CORES, // CUDA
ATTRIBUTE_L2, // CUDA ATTRIBUTE_L2, // CUDA
ATTRIBUTE_MEMORY, // CUDA,HSA ATTRIBUTE_MEMORY, // CUDA
ATTRIBUTE_MEMORY_FREQ, // CUDA ATTRIBUTE_MEMORY_FREQ, // CUDA
ATTRIBUTE_BUS_WIDTH, // CUDA,HSA ATTRIBUTE_BUS_WIDTH, // CUDA
ATTRIBUTE_PEAK_TENSOR, // CUDA ATTRIBUTE_PEAK_TENSOR, // CUDA
ATTRIBUTE_EUS, // Intel ATTRIBUTE_EUS, // Intel
ATTRIBUTE_GT, // Intel ATTRIBUTE_GT, // Intel
@@ -70,7 +69,6 @@ static const AttributeField ATTRIBUTE_INFO[] = {
{ ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" }, { ATTRIBUTE_FREQUENCY, "Max Frequency:", "Max Freq.:" },
{ ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" }, { ATTRIBUTE_PEAK, "Peak Performance:", "Peak Perf.:" },
{ ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" }, { ATTRIBUTE_COMPUTE_UNITS, "Compute Units (CUs):", "CUs" },
{ ATTRIBUTE_LDS_SIZE, "LDS size:", "LDS:" },
{ ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" }, { ATTRIBUTE_STREAMINGMP, "SMs:", "SMs:" },
{ ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" }, { ATTRIBUTE_CORESPERMP, "Cores/SM:", "Cores/SM:" },
{ ATTRIBUTE_CUDA_CORES, "CUDA Cores:", "CUDA Cores:" }, { ATTRIBUTE_CUDA_CORES, "CUDA Cores:", "CUDA Cores:" },
@@ -489,9 +487,6 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
char* manufacturing_process = get_str_process(gpu->arch); char* manufacturing_process = get_str_process(gpu->arch);
char* cus = get_str_cu(gpu); char* cus = get_str_cu(gpu);
char* max_frequency = get_str_freq(gpu); char* max_frequency = get_str_freq(gpu);
char* bus_width = get_str_bus_width(gpu);
char* mem_size = get_str_memory_size(gpu);
char* lds_size = get_str_lds_size(gpu);
setAttribute(art, ATTRIBUTE_NAME, gpu_name); setAttribute(art, ATTRIBUTE_NAME, gpu_name);
if (gpu_chip != NULL) { if (gpu_chip != NULL) {
@@ -501,9 +496,6 @@ bool print_gpufetch_amd(struct gpu_info* gpu, STYLE s, struct color** cs, struct
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus); setAttribute(art, ATTRIBUTE_COMPUTE_UNITS, cus);
setAttribute(art, ATTRIBUTE_LDS_SIZE, lds_size);
setAttribute(art, ATTRIBUTE_MEMORY, mem_size);
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
bool use_short = false; bool use_short = false;
uint32_t longest_attribute = longest_attribute_length(art, use_short); uint32_t longest_attribute = longest_attribute_length(art, use_short);

View File

@@ -23,9 +23,6 @@ struct agent_info {
char device_mkt_name[64]; char device_mkt_name[64];
uint32_t max_clock_freq; uint32_t max_clock_freq;
uint32_t compute_unit; uint32_t compute_unit;
uint32_t bus_width;
uint32_t lds_size;
uint64_t global_size;
}; };
#define RET_IF_HSA_ERR(err) { \ #define RET_IF_HSA_ERR(err) { \
@@ -43,46 +40,6 @@ struct agent_info {
} \ } \
} }
hsa_status_t memory_pool_callback(hsa_amd_memory_pool_t pool, void* data) {
struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
hsa_amd_segment_t segment;
hsa_status_t err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
RET_IF_HSA_ERR(err);
if (segment == HSA_AMD_SEGMENT_GROUP) {
// LDS memory
uint32_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
RET_IF_HSA_ERR(err);
info->lds_size = size;
}
else if (segment == HSA_AMD_SEGMENT_GLOBAL) {
// Global memory
uint32_t global_flags = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flags);
RET_IF_HSA_ERR(err);
if (global_flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) {
if (info->global_size != 0) {
printErr("Found HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED twice!");
return HSA_STATUS_ERROR;
}
uint64_t size = 0;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size);
RET_IF_HSA_ERR(err);
info->global_size = size;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t agent_callback(hsa_agent_t agent, void *data) { hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
struct agent_info* info = reinterpret_cast<struct agent_info *>(data); struct agent_info* info = reinterpret_cast<struct agent_info *>(data);
@@ -105,14 +62,6 @@ hsa_status_t agent_callback(hsa_agent_t agent, void *data) {
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit); err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &info->compute_unit);
RET_IF_HSA_ERR(err); RET_IF_HSA_ERR(err);
// According to the documentation, this is deprecated. But what should I be using then?
err = hsa_agent_get_info(agent, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_WIDTH, &info->bus_width);
RET_IF_HSA_ERR(err);
info->global_size = 0;
err = hsa_amd_agent_iterate_memory_pools(agent, memory_pool_callback, data);
RET_IF_HSA_ERR(err);
} }
return HSA_STATUS_SUCCESS; return HSA_STATUS_SUCCESS;
@@ -126,16 +75,6 @@ struct topology_h* get_topology_info(struct agent_info info) {
return topo; return topo;
} }
struct memory* get_memory_info(struct gpu_info* gpu, struct agent_info info) {
struct memory* mem = (struct memory*) emalloc(sizeof(struct memory));
mem->bus_width = info.bus_width;
mem->lds_size = info.lds_size;
mem->size_bytes = info.global_size;
return mem;
}
struct gpu_info* get_gpu_info_hsa(int gpu_idx) { struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL; gpu->pci = NULL;
@@ -179,7 +118,6 @@ struct gpu_info* get_gpu_info_hsa(int gpu_idx) {
gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1)); gpu->name = (char *) emalloc(sizeof(char) * (strlen(info.device_mkt_name) + 1));
strcpy(gpu->name, info.device_mkt_name); strcpy(gpu->name, info.device_mkt_name);
gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name); gpu->arch = get_uarch_from_hsa(gpu, info.gpu_name);
gpu->mem = get_memory_info(gpu, info);
if (gpu->arch == NULL) { if (gpu->arch == NULL) {
return NULL; return NULL;