9 Commits
v0.10 ... v0.11

21 changed files with 395 additions and 194 deletions

81
CMakeLists.txt Normal file
View File

@@ -0,0 +1,81 @@
cmake_minimum_required(VERSION 3.10)
include(CheckLanguage)
include(ExternalProject)
project(gpufetch CXX)
set(SRC_DIR "src")
set(COMMON_DIR "${SRC_DIR}/common")
set(CUDA_DIR "${SRC_DIR}/cuda")
if(NOT WIN32)
string(ASCII 27 Esc)
set(ColorReset "${Esc}[m")
set(ColorBold "${Esc}[1m")
set(Red "${Esc}[31m")
set(Green "${Esc}[32m")
set(BoldRed "${Esc}[1;31m")
set(BoldGreen "${Esc}[1;32m")
set(BoldYellow "${Esc}[1;33m")
endif()
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
else()
message(FATAL_ERROR "${BoldRed}[ERROR]${ColorReset} Unable to find CUDA compiler. You may use -DCMAKE_CUDA_COMPILER and -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT if CUDA is installed but not detected by CMake")
endif()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
find_package(PCIUTILS)
if(NOT ${PCIUTILS_FOUND})
message(STATUS "${BoldYellow}pciutils not found, downloading and building a local copy...${ColorReset}")
# Download and build pciutils
set(PCIUTILS_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/pciutils-install)
ExternalProject_Add(pciutils
GIT_REPOSITORY https://github.com/pciutils/pciutils
CONFIGURE_COMMAND ""
BUILD_COMMAND make SHARED=no
BUILD_IN_SOURCE true
INSTALL_COMMAND make PREFIX=${PCIUTILS_INSTALL_LOCATION} install-lib
)
include_directories(${PCIUTILS_INSTALL_LOCATION}/include)
link_directories(${PCIUTILS_INSTALL_LOCATION}/lib)
else()
include_directories(${PCIUTILS_INCLUDE_DIR})
link_libraries(${PCIUTILS_LIBRARIES})
endif()
set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic")
# https://en.wikipedia.org/w/index.php?title=CUDA&section=5#GPUs_supported
# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86)
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75)
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72)
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
endif()
link_directories(${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp)
if(NOT ${PCIUTILS_FOUND})
add_dependencies(cuda_backend pciutils)
add_dependencies(gpufetch pciutils)
endif()
target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
target_link_libraries(cuda_backend cudart)
target_link_libraries(gpufetch cuda_backend pci z)
install(TARGETS gpufetch DESTINATION bin)

View File

@@ -1,53 +0,0 @@
CXX ?= g++
CUDA_PATH ?= /usr/local/cuda/
PREFIX ?= /usr
CXXFLAGS+=-Wall -Wextra -pedantic -fstack-protector-all -pedantic
SANITY_FLAGS=-Wfloat-equal -Wshadow -Wpointer-arith
SRC_COMMON=src/common/
SRC_CUDA=src/cuda/
COMMON_SRC = $(SRC_COMMON)main.cpp $(SRC_COMMON)gpu.cpp $(SRC_COMMON)args.cpp $(SRC_COMMON)global.cpp $(SRC_COMMON)printer.cpp
COMMON_HDR = $(SRC_COMMON)ascii.hpp $(SRC_COMMON)gpu.hpp $(SRC_COMMON)args.hpp $(SRC_COMMON)global.hpp $(SRC_COMMON)printer.hpp
CUDA_SRC = $(SRC_CUDA)cuda.cpp $(SRC_CUDA)uarch.cpp $(SRC_CUDA)pci.cpp $(SRC_CUDA)nvmlb.cpp
CUDA_HDR = $(SRC_CUDA)cuda.hpp $(SRC_CUDA)uarch.hpp $(SRC_CUDA)pci.hpp $(SRC_CUDA)nvmlb.hpp $(SRC_CUDA)chips.hpp
SOURCE += $(COMMON_SRC) $(CUDA_SRC)
HEADERS += $(COMMON_HDR) $(CUDA_HDR)
OUTPUT=gpufetch
CXXFLAGS+= -I $(CUDA_PATH)/samples/common/inc -I $(CUDA_PATH)/targets/x86_64-linux/include -L $(CUDA_PATH)/targets/x86_64-linux/lib -lcudart -lnvidia-ml
all: CXXFLAGS += -O3
all: $(OUTPUT)
debug: CXXFLAGS += -g -O0
debug: $(OUTPUT)
static: CXXFLAGS += -static -O3
static: $(OUTPUT)
strict: CXXFLAGS += -O3 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2
strict: $(OUTPUT)
$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
$(CXX) $(CXXFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)
run: $(OUTPUT)
./$(OUTPUT)
clean:
@rm -f $(OUTPUT)
install: $(OUTPUT)
install -Dm755 "gpufetch" "$(DESTDIR)$(PREFIX)/bin/gpufetch"
install -Dm644 "LICENSE" "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
install -Dm644 "gpufetch.1" "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"
uninstall:
rm -f "$(DESTDIR)$(PREFIX)/bin/gpufetch"
rm -f "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
rm -f "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"

View File

@@ -31,18 +31,28 @@
gpufetch supports NVIDIA GPUs under Linux only.
# 2. Installation (building from source)
You will need a C++ compiler (e.g, `g++`), `make` and CUDA to compile `gpufetch`. To do so, just clone the repo and run `make`:
You will need:
- C++ compiler (e.g, `g++`)
- `cmake`
- `make`
- CUDA (NVIDIA backend)
- pciutils (optional)
To build gpufetch, just clone the repo and run `./build.sh`:
```
git clone https://github.com/Dr-Noob/gpufetch
cd gpufetch
make
./build.sh
./gpufetch
```
When building gpufetch, you may encounter an error telling you that it cannot find some CUDA header files. In this case, is very likely that the Makefile is unable to find your CUDA installation. This can be solved by setting `CUDA_PATH` to the correct CUDA installation path. For example:
- NOTE 1: It is recomended to install the `pciutils` development package, which is needed by gpufetch. If it is not installed, it will be downloaded and built automatically just to compile gpufetch.
- NOTE 2: When building gpufetch, cmake may fail if it is unable to find the CUDA installation. If CUDA is installed but CMake does not find it, you need to pass the CUDA path to cmake. You can do this easily by editing directly the `build.sh` script. For example:
```
CUDA_PATH=/opt/cuda make
cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
```
# 3. Colors and style

19
build.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# gpufetch build script
set -e
rm -rf build/ gpufetch
mkdir build/
cd build/
# In case you have CUDA installed but it is not detected,
# - set CMAKE_CUDA_COMPILER to your nvcc binary:
# - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir
# for example:
# cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
cmake ..
make -j$(nproc)
cd -
ln -s build/gpufetch .

29
cmake/FindPCIUTILS.cmake Normal file
View File

@@ -0,0 +1,29 @@
# - Try to find the pciutils directory library
# Once done this will define
#
# PCIUTILS_FOUND - system has PCIUtils
# PCIUTILS_INCLUDE_DIR - the PCIUTILS include directory
# PCIUTILS_LIBRARIES - The libraries needed to use PCIUtils
if(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
set(PCIUTILS_FIND_QUIETLY TRUE)
endif(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
FIND_PATH(PCIUTILS_INCLUDE_DIR pci/pci.h)
FIND_LIBRARY(PCIUTILS_LIBRARY NAMES pci)
if(PCIUTILS_LIBRARY)
FIND_LIBRARY(RESOLV_LIBRARY NAMES resolv)
if(RESOLV_LIBRARY)
set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY} ${RESOLV_LIBRARY})
else(RESOLV_LIBRARY)
set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY})
endif(RESOLV_LIBRARY)
endif(PCIUTILS_LIBRARY)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCIUTILS DEFAULT_MSG PCIUTILS_LIBRARIES PCIUTILS_INCLUDE_DIR)
MARK_AS_ADVANCED(PCIUTILS_INCLUDE_DIR PCIUTILS_LIBRARIES)

47
gpufetch.1 Normal file
View File

@@ -0,0 +1,47 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.48.3.
.TH GPUFETCH "1" "August 2021" "gpufetch v0.10" "User Commands"
.SH NAME
gpufetch
.SH SYNOPSIS
.B gpufetch
[\fI\,OPTION\/\fR]...
.SH DESCRIPTION
Simple yet fancy GPU architecture fetching tool
.SH OPTIONS
.TP
\fB\-c\fR, \fB\-\-color\fR
Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation
.TP
\fB\-g\fR, \fB\-\-gpu\fR
Selects the GPU to use (default: 0)
.TP
\fB\-h\fR, \fB\-\-help\fR
Prints this help and exit
.TP
\fB\-V\fR, \fB\-\-version\fR
Prints gpufetch version and exit
.SS "COLORS:"
.IP
Color scheme can be set using a predefined color scheme or a custom one:
1. To use a predefined color scheme, the name of the scheme must be provided. Possible values are:
* "nvidia": Use NVIDIA default color scheme
2. To use a custom color scheme, 4 colors must be given in RGB with the format: R,G,B:R,G,B:...
The first 2 colors are the GPU art color and the following 2 colors are the text colors
.SS "EXAMPLES:"
.IP
Run gpufetch with NVIDIA color scheme:
.IP
\&./gpufetch \fB\-\-color\fR nvidia
.IP
Run gpufetch with a custom color scheme:
.IP
\&./gpufetch \fB\-\-color\fR 239,90,45:210,200,200:100,200,45:0,200,200
.SS "BUGS:"
.IP
Report bugs to https://github.com/Dr\-Noob/gpufetch/issues
.SS "NOTE:"
.IP
Peak performance information is NOT accurate. gpufetch computes peak performance using the max
frequency. However, to properly compute peak performance, you need to know the frequency of the
GPU running real code.
For peak performance measurement see: https://github.com/Dr\-Noob/peakperf

View File

@@ -19,6 +19,7 @@
struct args_struct {
bool help_flag;
bool version_flag;
bool list_gpus;
int gpu_idx;
STYLE style;
struct color** colors;
@@ -28,17 +29,19 @@ int errn = 0;
static struct args_struct args;
const char args_chr[] = {
/* [ARG_CHAR_COLOR] = */ 'c',
/* [ARG_CHAR_GPU] = */ 'g',
/* [ARG_CHAR_HELP] = */ 'h',
/* [ARG_CHAR_VERSION] = */ 'V',
/* [ARG_COLOR] = */ 'c',
/* [ARG_GPU] = */ 'g',
/* [ARG_LIST] = */ 'l',
/* [ARG_HELP] = */ 'h',
/* [ARG_VERSION] = */ 'V',
};
const char *args_str[] = {
/* [ARG_CHAR_COLOR] = */ "color",
/* [ARG_CHAR_GPU] = */ "gpu",
/* [ARG_CHAR_HELP] = */ "help",
/* [ARG_CHAR_VERSION] = */ "version",
/* [ARG_COLOR] = */ "color",
/* [ARG_GPU] = */ "gpu",
/* [ARG_LIST] = */ "list-gpus",
/* [ARG_HELP] = */ "help",
/* [ARG_VERSION] = */ "version",
};
int getarg_int(char* str) {
@@ -100,6 +103,10 @@ bool show_help() {
return args.help_flag;
}
bool list_gpus() {
return args.list_gpus;
}
bool show_version() {
return args.version_flag;
}
@@ -119,8 +126,9 @@ char* build_short_options() {
char* str = (char *) emalloc(sizeof(char) * (len*2 + 1));
memset(str, 0, sizeof(char) * (len*2 + 1));
sprintf(str, "%c:%c:%c%c", c[ARG_GPU],
c[ARG_COLOR], c[ARG_HELP], c[ARG_VERSION]);
sprintf(str, "%c:%c:%c%c%c", c[ARG_GPU],
c[ARG_COLOR], c[ARG_HELP], c[ARG_LIST],
c[ARG_VERSION]);
return str;
}
@@ -185,12 +193,14 @@ bool parse_args(int argc, char* argv[]) {
args.version_flag = false;
args.help_flag = false;
args.list_gpus = false;
args.gpu_idx = 0;
args.colors = NULL;
const struct option long_options[] = {
{args_str[ARG_COLOR], required_argument, 0, args_chr[ARG_COLOR] },
{args_str[ARG_GPU], required_argument, 0, args_chr[ARG_GPU] },
{args_str[ARG_LIST], no_argument, 0, args_chr[ARG_LIST] },
{args_str[ARG_HELP], no_argument, 0, args_chr[ARG_HELP] },
{args_str[ARG_VERSION], no_argument, 0, args_chr[ARG_VERSION] },
{0, 0, 0, 0}
@@ -199,7 +209,7 @@ bool parse_args(int argc, char* argv[]) {
char* short_options = build_short_options();
opt = getopt_long(argc, argv, short_options, long_options, &option_index);
while (!args.help_flag && !args.version_flag && opt != -1) {
while (!args.help_flag && !args.version_flag && !args.list_gpus && opt != -1) {
if(opt == args_chr[ARG_COLOR]) {
args.colors = (struct color **) emalloc(sizeof(struct color *) * NUM_COLORS);
if(!parse_color(optarg, &args.colors)) {
@@ -215,8 +225,11 @@ bool parse_args(int argc, char* argv[]) {
return false;
}
}
else if(opt == args_chr[ARG_LIST]) {
args.list_gpus = true;
}
else if(opt == args_chr[ARG_HELP]) {
args.help_flag = true;
args.help_flag = true;
}
else if(opt == args_chr[ARG_VERSION]) {
args.version_flag = true;

View File

@@ -21,6 +21,7 @@ enum {
enum {
ARG_COLOR,
ARG_GPU,
ARG_LIST,
ARG_HELP,
ARG_VERSION
};
@@ -33,6 +34,7 @@ extern const char *args_str[];
int max_arg_str_length();
bool parse_args(int argc, char* argv[]);
bool show_help();
bool list_gpus();
bool show_version();
void free_colors_struct(struct color** cs);
int get_gpu_idx();

View File

@@ -2,7 +2,6 @@
#define __GLOBAL__
#include <stdbool.h>
#include <stddef.h>
#include <cstddef>
#define STRING_UNKNOWN "Unknown"

View File

@@ -116,17 +116,17 @@ char* get_str_l2(struct gpu_info* gpu) {
return string;
}
char* get_str_peak_performance(struct gpu_info* gpu) {
char* get_str_peak_performance_generic(int64_t pp) {
char* str;
if(gpu->peak_performance == -1) {
if(pp == -1) {
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
return str;
}
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
double flopsd = (double) gpu->peak_performance;
double flopsd = (double) pp;
uint32_t max_size = 7+1+7+1;
str = (char *) ecalloc(max_size, sizeof(char));
@@ -139,3 +139,12 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
return str;
}
char* get_str_peak_performance(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance);
}
char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance_t);
}

View File

@@ -4,7 +4,6 @@
#include <stdint.h>
#include <stdbool.h>
#include "../cuda/nvmlb.hpp"
#include "../cuda/pci.hpp"
#define UNKNOWN_FREQ -1
@@ -41,6 +40,7 @@ struct topology {
int32_t streaming_mp;
int32_t cores_per_mp;
int32_t cuda_cores;
int32_t tensor_cores;
};
struct memory {
@@ -57,11 +57,11 @@ struct gpu_info {
char* name;
int64_t freq;
struct pci* pci;
struct nvml_data* nvmld;
struct topology* topo;
struct memory* mem;
struct cache* cach;
int64_t peak_performance;
int64_t peak_performance_t;
int32_t idx;
};
@@ -74,5 +74,6 @@ char* get_str_bus_width(struct gpu_info* gpu);
char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu);
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
#endif

View File

@@ -7,7 +7,7 @@
#include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp"
static const char* VERSION = "0.10";
static const char* VERSION = "0.11";
void print_help(char *argv[]) {
const char **t = args_str;
@@ -18,10 +18,11 @@ void print_help(char *argv[]) {
printf("Simple yet fancy GPU architecture fetching tool\n\n");
printf("Options: \n");
printf(" -%c, --%s %*s Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), "");
printf(" -%c, --%s %*s Selects the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), "");
printf(" -%c, --%s %*s Prints this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
printf(" -%c, --%s %*s Prints gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
printf(" -%c, --%s %*s Set the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), "");
printf(" -%c, --%s %*s List the available GPUs in the system\n", c[ARG_LIST], t[ARG_LIST], (int) (max_len-strlen(t[ARG_LIST])), "");
printf(" -%c, --%s %*s Select the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), "");
printf(" -%c, --%s %*s Print this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
printf(" -%c, --%s %*s Print gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
printf("\nCOLORS: \n");
printf(" Color scheme can be set using a predefined color scheme or a custom one:\n");
@@ -64,6 +65,10 @@ int main(int argc, char* argv[]) {
return EXIT_SUCCESS;
}
if(list_gpus()) {
return print_gpus_list();
}
set_log_level(true);
printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\

45
src/common/pci.cpp Normal file
View File

@@ -0,0 +1,45 @@
#include "global.hpp"
#include "pci.hpp"
#include <cstddef>
/*
* doc: https://wiki.osdev.org/PCI#Class_Codes
* https://pci-ids.ucw.cz/read/PC
*/
#define VENDOR_ID_NVIDIA 0x10de
#define CLASS_VGA_CONTROLLER 0x0300
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices) {
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
return dev->vendor_id;
}
}
printErr("Unable to find a CUDA device using pciutils");
return 0;
}
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices) {
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
return dev->device_id;
}
}
printErr("Unable to find a CUDA device using pciutils");
return 0;
}
struct pci_dev *get_pci_devices_from_pciutils() {
struct pci_access *pacc;
struct pci_dev *dev;
pacc = pci_alloc();
pci_init(pacc);
pci_scan_bus(pacc);
for (dev=pacc->devices; dev; dev=dev->next) {
pci_fill_info(dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS);
}
return pacc->devices;
}

13
src/common/pci.hpp Normal file
View File

@@ -0,0 +1,13 @@
#ifndef __GPUFETCH_PCI__
#define __GPUFETCH_PCI__
#include <cstdint>
extern "C" {
#include <pci/pci.h>
}
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices);
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices);
struct pci_dev *get_pci_devices_from_pciutils();
#endif

View File

@@ -38,11 +38,13 @@ enum {
ATTRIBUTE_STREAMINGMP,
ATTRIBUTE_CORESPERMP,
ATTRIBUTE_CUDA_CORES,
ATTRIBUTE_TENSOR_CORES,
ATTRIBUTE_L2,
ATTRIBUTE_MEMORY,
ATTRIBUTE_MEMORY_FREQ,
ATTRIBUTE_BUS_WIDTH,
ATTRIBUTE_PEAK
ATTRIBUTE_PEAK,
ATTRIBUTE_PEAK_TENSOR,
};
static const char* ATTRIBUTE_FIELDS [] = {
@@ -53,12 +55,14 @@ static const char* ATTRIBUTE_FIELDS [] = {
"Max Frequency:",
"SMs:",
"Cores/SM:",
"CUDA cores:",
"CUDA Cores:",
"Tensor Cores:",
"L2 Size:",
"Memory:",
"Memory frequency:",
"Bus width:",
"Peak Performance:",
"Peak Performance (TC):",
};
static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -69,12 +73,14 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Max Freq.:",
"SMs:",
"Cores/SM:",
"CUDA cores:",
"CUDA Cores:",
"Tensor Cores:",
"L2 Size:",
"Memory:",
"Memory freq.:",
"Bus width:",
"Peak Perf.:",
"Peak Perf.(TC):",
};
struct terminal {
@@ -350,6 +356,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
char* sms = get_str_sm(gpu);
char* corespersm = get_str_cores_sm(gpu);
char* cores = get_str_cuda_cores(gpu);
char* tensorc = get_str_tensor_cores(gpu);
char* max_frequency = get_str_freq(gpu);
char* l2 = get_str_l2(gpu);
char* mem_size = get_str_memory_size(gpu);
@@ -357,6 +364,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
char* mem_freq = get_str_memory_clock(gpu);
char* bus_width = get_str_bus_width(gpu);
char* pp = get_str_peak_performance(gpu);
char* pp_tensor = get_str_peak_performance_tensor(gpu);
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
sprintf(mem, "%s %s", mem_size, mem_type);
@@ -372,11 +380,17 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm);
setAttribute(art, ATTRIBUTE_CUDA_CORES, cores);
if(gpu->topo->tensor_cores >= 0) {
setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc);
}
setAttribute(art, ATTRIBUTE_MEMORY, mem);
setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq);
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
setAttribute(art, ATTRIBUTE_L2, l2);
setAttribute(art, ATTRIBUTE_PEAK, pp);
if(gpu->topo->tensor_cores >= 0) {
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
}
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);

View File

@@ -2,10 +2,46 @@
#include <cuda_runtime.h>
#include "cuda.hpp"
#include "nvmlb.hpp"
#include "uarch.hpp"
#include "../common/pci.hpp"
#include "../common/global.hpp"
int print_gpus_list() {
cudaError_t err = cudaSuccess;
int num_gpus = -1;
if ((err = cudaGetDeviceCount(&num_gpus)) != cudaSuccess) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return EXIT_FAILURE;
}
printf("CUDA GPUs available: %d\n", num_gpus);
if(num_gpus > 0) {
cudaDeviceProp deviceProp;
int max_len = 0;
for(int idx=0; idx < num_gpus; idx++) {
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return EXIT_FAILURE;
}
max_len = max(max_len, (int) strlen(deviceProp.name));
}
for(int i=0; i < max_len + 32; i++) putchar('-');
putchar('\n');
for(int idx=0; idx < num_gpus; idx++) {
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return EXIT_FAILURE;
}
printf("GPU %d: %s (Compute Capability %d.%d)\n", idx, deviceProp.name, deviceProp.major, deviceProp.minor);
}
}
return EXIT_SUCCESS;
}
struct cache* get_cache_info(cudaDeviceProp prop) {
struct cache* cach = (struct cache*) emalloc(sizeof(struct cache));
@@ -17,12 +53,19 @@ struct cache* get_cache_info(cudaDeviceProp prop) {
return cach;
}
int get_tensor_cores(int sm, int major) {
if(major == 7) return sm * 8;
else if(major == 8) return sm * 4;
else return 0;
}
struct topology* get_topology_info(cudaDeviceProp prop) {
struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
topo->streaming_mp = prop.multiProcessorCount;
topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
topo->tensor_cores = get_tensor_cores(topo->streaming_mp, prop.major);
return topo;
}
@@ -60,10 +103,16 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
return mem;
}
// Compute peak performance when using CUDA cores
int64_t get_peak_performance(struct gpu_info* gpu) {
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
}
// Compute peak performance when using tensor cores
int64_t get_peak_performance_t(struct gpu_info* gpu) {
return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
}
struct gpu_info* get_gpu_info(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL;
@@ -106,38 +155,39 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
strcpy(gpu->name, deviceProp.name);
gpu->nvmld = nvml_init();
if(nvml_get_pci_info(gpu->idx, gpu->nvmld)) {
gpu->pci = get_pci_from_nvml(gpu->nvmld);
}
struct pci_dev *devices = get_pci_devices_from_pciutils();
gpu->pci = get_pci_from_pciutils(devices);
gpu->arch = get_uarch_from_cuda(gpu);
gpu->cach = get_cache_info(deviceProp);
gpu->mem = get_memory_info(gpu, deviceProp);
gpu->topo = get_topology_info(deviceProp);
gpu->peak_performance = get_peak_performance(gpu);
gpu->peak_performance_t = get_peak_performance_t(gpu);
return gpu;
}
char* get_str_sm(struct gpu_info* gpu) {
uint32_t max_size = 10;
char* get_str_generic(int32_t data) {
// Largest int is 10, +1 for possible negative, +1 for EOL
uint32_t max_size = 12;
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", gpu->topo->streaming_mp);
snprintf(dummy, max_size, "%d", data);
return dummy;
}
char* get_str_sm(struct gpu_info* gpu) {
return get_str_generic(gpu->topo->streaming_mp);
}
char* get_str_cores_sm(struct gpu_info* gpu) {
uint32_t max_size = 10;
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", gpu->topo->cores_per_mp);
return dummy;
return get_str_generic(gpu->topo->cores_per_mp);
}
char* get_str_cuda_cores(struct gpu_info* gpu) {
uint32_t max_size = 10;
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", gpu->topo->cuda_cores);
return dummy;
return get_str_generic(gpu->topo->cuda_cores);
}
char* get_str_tensor_cores(struct gpu_info* gpu) {
return get_str_generic(gpu->topo->tensor_cores);
}

View File

@@ -4,8 +4,10 @@
#include "../common/gpu.hpp"
struct gpu_info* get_gpu_info(int gpu_idx);
int print_gpus_list();
char* get_str_sm(struct gpu_info* gpu);
char* get_str_cores_sm(struct gpu_info* gpu);
char* get_str_cuda_cores(struct gpu_info* gpu);
char* get_str_tensor_cores(struct gpu_info* gpu);
#endif

View File

@@ -1,70 +0,0 @@
#include <nvml.h>
#include "nvmlb.hpp"
#include "../common/global.hpp"
struct nvml_data {
bool nvml_started;
nvmlPciInfo_t pci;
};
struct nvml_data* nvml_init() {
struct nvml_data* data = (struct nvml_data*) emalloc(sizeof(struct nvml_data));
data->nvml_started = false;
nvmlReturn_t result;
if ((result = nvmlInit()) != NVML_SUCCESS) {
printErr("nvmlInit: %s\n", nvmlErrorString(result));
return NULL;
}
data->nvml_started = true;
return data;
}
bool nvml_get_pci_info(int gpu_idx, struct nvml_data* data) {
nvmlReturn_t result;
nvmlDevice_t device;
if(!data->nvml_started) {
printErr("nvml_get_pci_info: nvml was not started");
return false;
}
if ((result = nvmlDeviceGetHandleByIndex(gpu_idx, &device)) != NVML_SUCCESS) {
printErr("nvmlDeviceGetHandleByIndex: %s\n", nvmlErrorString(result));
return false;
}
if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != NVML_SUCCESS) {
printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
return false;
}
return true;
}
uint16_t nvml_get_pci_vendor_id(struct nvml_data* data) {
return data->pci.pciDeviceId & 0x0000FFFF;
}
uint16_t nvml_get_pci_device_id(struct nvml_data* data) {
return (data->pci.pciDeviceId & 0xFFFF0000) >> 16;
}
bool nvml_shutdown(struct nvml_data* data) {
nvmlReturn_t result;
if(!data->nvml_started) {
printWarn("nvml_get_pci_info: nvml was not started");
return true;
}
if ((result = nvmlShutdown()) != NVML_SUCCESS) {
printErr("nvmlShutdown: %s\n", nvmlErrorString(result));
return false;
}
return true;
}

View File

@@ -1,16 +0,0 @@
// NVML Backend
#ifndef __NVMLB__
#define __NVMLB__
#include <stdbool.h>
#include <stdint.h>
struct nvml_data;
struct nvml_data* nvml_init();
bool nvml_get_pci_info(int dev, struct nvml_data* data);
uint16_t nvml_get_pci_vendor_id(struct nvml_data* data);
uint16_t nvml_get_pci_device_id(struct nvml_data* data);
bool nvml_shutdown(struct nvml_data* data);
#endif

View File

@@ -1,9 +1,9 @@
#include <stdio.h>
#include "pci.hpp"
#include "nvmlb.hpp"
#include "chips.hpp"
#include "../common/global.hpp"
#include "../common/pci.hpp"
#define CHECK_PCI_START if (false) {}
#define CHECK_PCI(pci, id, chip) \
@@ -15,11 +15,11 @@ struct pci {
uint16_t device_id;
};
struct pci* get_pci_from_nvml(struct nvml_data* data) {
struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
pci->vendor_id = nvml_get_pci_vendor_id(data);
pci->device_id = nvml_get_pci_device_id(data);
pci->vendor_id = pciutils_get_pci_vendor_id(devices);
pci->device_id = pciutils_get_pci_device_id(devices);
return pci;
}

View File

@@ -1,13 +1,14 @@
#ifndef __PCI__
#define __PCI__
#ifndef __PCI_CUDA__
#define __PCI_CUDA__
#include <stdint.h>
#include "nvmlb.hpp"
#include "../common/pci.hpp"
#include "chips.hpp"
struct pci;
struct pci* get_pci_from_nvml(struct nvml_data* data);
struct pci* get_pci_from_pciutils(struct pci_dev *devices);
GPUCHIP get_chip_from_pci(struct pci* pci);
#endif