Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
32b2c59b50 | ||
|
|
8bf0276aae | ||
|
|
821b6e760e | ||
|
|
f212fb88d4 | ||
|
|
81607151dc | ||
|
|
bdf9eb0079 | ||
|
|
039e7c350d | ||
|
|
4b4d1bc030 | ||
|
|
d00e3f183d |
81
CMakeLists.txt
Normal file
81
CMakeLists.txt
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.10)
|
||||||
|
include(CheckLanguage)
|
||||||
|
include(ExternalProject)
|
||||||
|
|
||||||
|
project(gpufetch CXX)
|
||||||
|
|
||||||
|
set(SRC_DIR "src")
|
||||||
|
set(COMMON_DIR "${SRC_DIR}/common")
|
||||||
|
set(CUDA_DIR "${SRC_DIR}/cuda")
|
||||||
|
|
||||||
|
if(NOT WIN32)
|
||||||
|
string(ASCII 27 Esc)
|
||||||
|
set(ColorReset "${Esc}[m")
|
||||||
|
set(ColorBold "${Esc}[1m")
|
||||||
|
set(Red "${Esc}[31m")
|
||||||
|
set(Green "${Esc}[32m")
|
||||||
|
set(BoldRed "${Esc}[1;31m")
|
||||||
|
set(BoldGreen "${Esc}[1;32m")
|
||||||
|
set(BoldYellow "${Esc}[1;33m")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
check_language(CUDA)
|
||||||
|
if(CMAKE_CUDA_COMPILER)
|
||||||
|
enable_language(CUDA)
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR "${BoldRed}[ERROR]${ColorReset} Unable to find CUDA compiler. You may use -DCMAKE_CUDA_COMPILER and -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT if CUDA is installed but not detected by CMake")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
|
||||||
|
find_package(PCIUTILS)
|
||||||
|
if(NOT ${PCIUTILS_FOUND})
|
||||||
|
message(STATUS "${BoldYellow}pciutils not found, downloading and building a local copy...${ColorReset}")
|
||||||
|
|
||||||
|
# Download and build pciutils
|
||||||
|
set(PCIUTILS_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/pciutils-install)
|
||||||
|
ExternalProject_Add(pciutils
|
||||||
|
GIT_REPOSITORY https://github.com/pciutils/pciutils
|
||||||
|
CONFIGURE_COMMAND ""
|
||||||
|
BUILD_COMMAND make SHARED=no
|
||||||
|
BUILD_IN_SOURCE true
|
||||||
|
INSTALL_COMMAND make PREFIX=${PCIUTILS_INSTALL_LOCATION} install-lib
|
||||||
|
)
|
||||||
|
|
||||||
|
include_directories(${PCIUTILS_INSTALL_LOCATION}/include)
|
||||||
|
link_directories(${PCIUTILS_INSTALL_LOCATION}/lib)
|
||||||
|
else()
|
||||||
|
include_directories(${PCIUTILS_INCLUDE_DIR})
|
||||||
|
link_libraries(${PCIUTILS_LIBRARIES})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
|
||||||
|
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic")
|
||||||
|
|
||||||
|
# https://en.wikipedia.org/w/index.php?title=CUDA§ion=5#GPUs_supported
|
||||||
|
# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
|
||||||
|
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86)
|
||||||
|
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75)
|
||||||
|
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72)
|
||||||
|
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
link_directories(${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
|
||||||
|
|
||||||
|
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
|
||||||
|
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp)
|
||||||
|
|
||||||
|
if(NOT ${PCIUTILS_FOUND})
|
||||||
|
add_dependencies(cuda_backend pciutils)
|
||||||
|
add_dependencies(gpufetch pciutils)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
|
||||||
|
|
||||||
|
target_link_libraries(cuda_backend cudart)
|
||||||
|
target_link_libraries(gpufetch cuda_backend pci z)
|
||||||
|
|
||||||
|
install(TARGETS gpufetch DESTINATION bin)
|
||||||
53
Makefile
53
Makefile
@@ -1,53 +0,0 @@
|
|||||||
CXX ?= g++
|
|
||||||
CUDA_PATH ?= /usr/local/cuda/
|
|
||||||
PREFIX ?= /usr
|
|
||||||
|
|
||||||
CXXFLAGS+=-Wall -Wextra -pedantic -fstack-protector-all -pedantic
|
|
||||||
SANITY_FLAGS=-Wfloat-equal -Wshadow -Wpointer-arith
|
|
||||||
|
|
||||||
SRC_COMMON=src/common/
|
|
||||||
SRC_CUDA=src/cuda/
|
|
||||||
|
|
||||||
COMMON_SRC = $(SRC_COMMON)main.cpp $(SRC_COMMON)gpu.cpp $(SRC_COMMON)args.cpp $(SRC_COMMON)global.cpp $(SRC_COMMON)printer.cpp
|
|
||||||
COMMON_HDR = $(SRC_COMMON)ascii.hpp $(SRC_COMMON)gpu.hpp $(SRC_COMMON)args.hpp $(SRC_COMMON)global.hpp $(SRC_COMMON)printer.hpp
|
|
||||||
|
|
||||||
CUDA_SRC = $(SRC_CUDA)cuda.cpp $(SRC_CUDA)uarch.cpp $(SRC_CUDA)pci.cpp $(SRC_CUDA)nvmlb.cpp
|
|
||||||
CUDA_HDR = $(SRC_CUDA)cuda.hpp $(SRC_CUDA)uarch.hpp $(SRC_CUDA)pci.hpp $(SRC_CUDA)nvmlb.hpp $(SRC_CUDA)chips.hpp
|
|
||||||
|
|
||||||
SOURCE += $(COMMON_SRC) $(CUDA_SRC)
|
|
||||||
HEADERS += $(COMMON_HDR) $(CUDA_HDR)
|
|
||||||
|
|
||||||
OUTPUT=gpufetch
|
|
||||||
|
|
||||||
CXXFLAGS+= -I $(CUDA_PATH)/samples/common/inc -I $(CUDA_PATH)/targets/x86_64-linux/include -L $(CUDA_PATH)/targets/x86_64-linux/lib -lcudart -lnvidia-ml
|
|
||||||
|
|
||||||
all: CXXFLAGS += -O3
|
|
||||||
all: $(OUTPUT)
|
|
||||||
|
|
||||||
debug: CXXFLAGS += -g -O0
|
|
||||||
debug: $(OUTPUT)
|
|
||||||
|
|
||||||
static: CXXFLAGS += -static -O3
|
|
||||||
static: $(OUTPUT)
|
|
||||||
|
|
||||||
strict: CXXFLAGS += -O3 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2
|
|
||||||
strict: $(OUTPUT)
|
|
||||||
|
|
||||||
$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
|
|
||||||
$(CXX) $(CXXFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)
|
|
||||||
|
|
||||||
run: $(OUTPUT)
|
|
||||||
./$(OUTPUT)
|
|
||||||
|
|
||||||
clean:
|
|
||||||
@rm -f $(OUTPUT)
|
|
||||||
|
|
||||||
install: $(OUTPUT)
|
|
||||||
install -Dm755 "gpufetch" "$(DESTDIR)$(PREFIX)/bin/gpufetch"
|
|
||||||
install -Dm644 "LICENSE" "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
|
|
||||||
install -Dm644 "gpufetch.1" "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"
|
|
||||||
|
|
||||||
uninstall:
|
|
||||||
rm -f "$(DESTDIR)$(PREFIX)/bin/gpufetch"
|
|
||||||
rm -f "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
|
|
||||||
rm -f "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"
|
|
||||||
18
README.md
18
README.md
@@ -31,18 +31,28 @@
|
|||||||
gpufetch supports NVIDIA GPUs under Linux only.
|
gpufetch supports NVIDIA GPUs under Linux only.
|
||||||
|
|
||||||
# 2. Installation (building from source)
|
# 2. Installation (building from source)
|
||||||
You will need a C++ compiler (e.g, `g++`), `make` and CUDA to compile `gpufetch`. To do so, just clone the repo and run `make`:
|
You will need:
|
||||||
|
|
||||||
|
- C++ compiler (e.g, `g++`)
|
||||||
|
- `cmake`
|
||||||
|
- `make`
|
||||||
|
- CUDA (NVIDIA backend)
|
||||||
|
- pciutils (optional)
|
||||||
|
|
||||||
|
To build gpufetch, just clone the repo and run `./build.sh`:
|
||||||
|
|
||||||
```
|
```
|
||||||
git clone https://github.com/Dr-Noob/gpufetch
|
git clone https://github.com/Dr-Noob/gpufetch
|
||||||
cd gpufetch
|
cd gpufetch
|
||||||
make
|
./build.sh
|
||||||
./gpufetch
|
./gpufetch
|
||||||
```
|
```
|
||||||
When building gpufetch, you may encounter an error telling you that it cannot find some CUDA header files. In this case, is very likely that the Makefile is unable to find your CUDA installation. This can be solved by setting `CUDA_PATH` to the correct CUDA installation path. For example:
|
|
||||||
|
- NOTE 1: It is recomended to install the `pciutils` development package, which is needed by gpufetch. If it is not installed, it will be downloaded and built automatically just to compile gpufetch.
|
||||||
|
- NOTE 2: When building gpufetch, cmake may fail if it is unable to find the CUDA installation. If CUDA is installed but CMake does not find it, you need to pass the CUDA path to cmake. You can do this easily by editing directly the `build.sh` script. For example:
|
||||||
|
|
||||||
```
|
```
|
||||||
CUDA_PATH=/opt/cuda make
|
cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
|
||||||
```
|
```
|
||||||
|
|
||||||
# 3. Colors and style
|
# 3. Colors and style
|
||||||
|
|||||||
19
build.sh
Executable file
19
build.sh
Executable file
@@ -0,0 +1,19 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# gpufetch build script
|
||||||
|
set -e
|
||||||
|
|
||||||
|
rm -rf build/ gpufetch
|
||||||
|
mkdir build/
|
||||||
|
cd build/
|
||||||
|
|
||||||
|
# In case you have CUDA installed but it is not detected,
|
||||||
|
# - set CMAKE_CUDA_COMPILER to your nvcc binary:
|
||||||
|
# - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir
|
||||||
|
# for example:
|
||||||
|
# cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
|
||||||
|
|
||||||
|
cmake ..
|
||||||
|
make -j$(nproc)
|
||||||
|
cd -
|
||||||
|
ln -s build/gpufetch .
|
||||||
29
cmake/FindPCIUTILS.cmake
Normal file
29
cmake/FindPCIUTILS.cmake
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# - Try to find the pciutils directory library
|
||||||
|
# Once done this will define
|
||||||
|
#
|
||||||
|
# PCIUTILS_FOUND - system has PCIUtils
|
||||||
|
# PCIUTILS_INCLUDE_DIR - the PCIUTILS include directory
|
||||||
|
# PCIUTILS_LIBRARIES - The libraries needed to use PCIUtils
|
||||||
|
|
||||||
|
if(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
|
||||||
|
set(PCIUTILS_FIND_QUIETLY TRUE)
|
||||||
|
endif(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
|
||||||
|
|
||||||
|
FIND_PATH(PCIUTILS_INCLUDE_DIR pci/pci.h)
|
||||||
|
|
||||||
|
FIND_LIBRARY(PCIUTILS_LIBRARY NAMES pci)
|
||||||
|
if(PCIUTILS_LIBRARY)
|
||||||
|
FIND_LIBRARY(RESOLV_LIBRARY NAMES resolv)
|
||||||
|
if(RESOLV_LIBRARY)
|
||||||
|
set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY} ${RESOLV_LIBRARY})
|
||||||
|
else(RESOLV_LIBRARY)
|
||||||
|
set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY})
|
||||||
|
endif(RESOLV_LIBRARY)
|
||||||
|
endif(PCIUTILS_LIBRARY)
|
||||||
|
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCIUTILS DEFAULT_MSG PCIUTILS_LIBRARIES PCIUTILS_INCLUDE_DIR)
|
||||||
|
|
||||||
|
MARK_AS_ADVANCED(PCIUTILS_INCLUDE_DIR PCIUTILS_LIBRARIES)
|
||||||
|
|
||||||
47
gpufetch.1
Normal file
47
gpufetch.1
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.48.3.
|
||||||
|
.TH GPUFETCH "1" "August 2021" "gpufetch v0.10" "User Commands"
|
||||||
|
.SH NAME
|
||||||
|
gpufetch
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B gpufetch
|
||||||
|
[\fI\,OPTION\/\fR]...
|
||||||
|
.SH DESCRIPTION
|
||||||
|
Simple yet fancy GPU architecture fetching tool
|
||||||
|
.SH OPTIONS
|
||||||
|
.TP
|
||||||
|
\fB\-c\fR, \fB\-\-color\fR
|
||||||
|
Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation
|
||||||
|
.TP
|
||||||
|
\fB\-g\fR, \fB\-\-gpu\fR
|
||||||
|
Selects the GPU to use (default: 0)
|
||||||
|
.TP
|
||||||
|
\fB\-h\fR, \fB\-\-help\fR
|
||||||
|
Prints this help and exit
|
||||||
|
.TP
|
||||||
|
\fB\-V\fR, \fB\-\-version\fR
|
||||||
|
Prints gpufetch version and exit
|
||||||
|
.SS "COLORS:"
|
||||||
|
.IP
|
||||||
|
Color scheme can be set using a predefined color scheme or a custom one:
|
||||||
|
1. To use a predefined color scheme, the name of the scheme must be provided. Possible values are:
|
||||||
|
* "nvidia": Use NVIDIA default color scheme
|
||||||
|
2. To use a custom color scheme, 4 colors must be given in RGB with the format: R,G,B:R,G,B:...
|
||||||
|
The first 2 colors are the GPU art color and the following 2 colors are the text colors
|
||||||
|
.SS "EXAMPLES:"
|
||||||
|
.IP
|
||||||
|
Run gpufetch with NVIDIA color scheme:
|
||||||
|
.IP
|
||||||
|
\&./gpufetch \fB\-\-color\fR nvidia
|
||||||
|
.IP
|
||||||
|
Run gpufetch with a custom color scheme:
|
||||||
|
.IP
|
||||||
|
\&./gpufetch \fB\-\-color\fR 239,90,45:210,200,200:100,200,45:0,200,200
|
||||||
|
.SS "BUGS:"
|
||||||
|
.IP
|
||||||
|
Report bugs to https://github.com/Dr\-Noob/gpufetch/issues
|
||||||
|
.SS "NOTE:"
|
||||||
|
.IP
|
||||||
|
Peak performance information is NOT accurate. gpufetch computes peak performance using the max
|
||||||
|
frequency. However, to properly compute peak performance, you need to know the frequency of the
|
||||||
|
GPU running real code.
|
||||||
|
For peak performance measurement see: https://github.com/Dr\-Noob/peakperf
|
||||||
@@ -19,6 +19,7 @@
|
|||||||
struct args_struct {
|
struct args_struct {
|
||||||
bool help_flag;
|
bool help_flag;
|
||||||
bool version_flag;
|
bool version_flag;
|
||||||
|
bool list_gpus;
|
||||||
int gpu_idx;
|
int gpu_idx;
|
||||||
STYLE style;
|
STYLE style;
|
||||||
struct color** colors;
|
struct color** colors;
|
||||||
@@ -28,17 +29,19 @@ int errn = 0;
|
|||||||
static struct args_struct args;
|
static struct args_struct args;
|
||||||
|
|
||||||
const char args_chr[] = {
|
const char args_chr[] = {
|
||||||
/* [ARG_CHAR_COLOR] = */ 'c',
|
/* [ARG_COLOR] = */ 'c',
|
||||||
/* [ARG_CHAR_GPU] = */ 'g',
|
/* [ARG_GPU] = */ 'g',
|
||||||
/* [ARG_CHAR_HELP] = */ 'h',
|
/* [ARG_LIST] = */ 'l',
|
||||||
/* [ARG_CHAR_VERSION] = */ 'V',
|
/* [ARG_HELP] = */ 'h',
|
||||||
|
/* [ARG_VERSION] = */ 'V',
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *args_str[] = {
|
const char *args_str[] = {
|
||||||
/* [ARG_CHAR_COLOR] = */ "color",
|
/* [ARG_COLOR] = */ "color",
|
||||||
/* [ARG_CHAR_GPU] = */ "gpu",
|
/* [ARG_GPU] = */ "gpu",
|
||||||
/* [ARG_CHAR_HELP] = */ "help",
|
/* [ARG_LIST] = */ "list-gpus",
|
||||||
/* [ARG_CHAR_VERSION] = */ "version",
|
/* [ARG_HELP] = */ "help",
|
||||||
|
/* [ARG_VERSION] = */ "version",
|
||||||
};
|
};
|
||||||
|
|
||||||
int getarg_int(char* str) {
|
int getarg_int(char* str) {
|
||||||
@@ -100,6 +103,10 @@ bool show_help() {
|
|||||||
return args.help_flag;
|
return args.help_flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool list_gpus() {
|
||||||
|
return args.list_gpus;
|
||||||
|
}
|
||||||
|
|
||||||
bool show_version() {
|
bool show_version() {
|
||||||
return args.version_flag;
|
return args.version_flag;
|
||||||
}
|
}
|
||||||
@@ -119,8 +126,9 @@ char* build_short_options() {
|
|||||||
char* str = (char *) emalloc(sizeof(char) * (len*2 + 1));
|
char* str = (char *) emalloc(sizeof(char) * (len*2 + 1));
|
||||||
memset(str, 0, sizeof(char) * (len*2 + 1));
|
memset(str, 0, sizeof(char) * (len*2 + 1));
|
||||||
|
|
||||||
sprintf(str, "%c:%c:%c%c", c[ARG_GPU],
|
sprintf(str, "%c:%c:%c%c%c", c[ARG_GPU],
|
||||||
c[ARG_COLOR], c[ARG_HELP], c[ARG_VERSION]);
|
c[ARG_COLOR], c[ARG_HELP], c[ARG_LIST],
|
||||||
|
c[ARG_VERSION]);
|
||||||
|
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
@@ -185,12 +193,14 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
|
|
||||||
args.version_flag = false;
|
args.version_flag = false;
|
||||||
args.help_flag = false;
|
args.help_flag = false;
|
||||||
|
args.list_gpus = false;
|
||||||
args.gpu_idx = 0;
|
args.gpu_idx = 0;
|
||||||
args.colors = NULL;
|
args.colors = NULL;
|
||||||
|
|
||||||
const struct option long_options[] = {
|
const struct option long_options[] = {
|
||||||
{args_str[ARG_COLOR], required_argument, 0, args_chr[ARG_COLOR] },
|
{args_str[ARG_COLOR], required_argument, 0, args_chr[ARG_COLOR] },
|
||||||
{args_str[ARG_GPU], required_argument, 0, args_chr[ARG_GPU] },
|
{args_str[ARG_GPU], required_argument, 0, args_chr[ARG_GPU] },
|
||||||
|
{args_str[ARG_LIST], no_argument, 0, args_chr[ARG_LIST] },
|
||||||
{args_str[ARG_HELP], no_argument, 0, args_chr[ARG_HELP] },
|
{args_str[ARG_HELP], no_argument, 0, args_chr[ARG_HELP] },
|
||||||
{args_str[ARG_VERSION], no_argument, 0, args_chr[ARG_VERSION] },
|
{args_str[ARG_VERSION], no_argument, 0, args_chr[ARG_VERSION] },
|
||||||
{0, 0, 0, 0}
|
{0, 0, 0, 0}
|
||||||
@@ -199,7 +209,7 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
char* short_options = build_short_options();
|
char* short_options = build_short_options();
|
||||||
opt = getopt_long(argc, argv, short_options, long_options, &option_index);
|
opt = getopt_long(argc, argv, short_options, long_options, &option_index);
|
||||||
|
|
||||||
while (!args.help_flag && !args.version_flag && opt != -1) {
|
while (!args.help_flag && !args.version_flag && !args.list_gpus && opt != -1) {
|
||||||
if(opt == args_chr[ARG_COLOR]) {
|
if(opt == args_chr[ARG_COLOR]) {
|
||||||
args.colors = (struct color **) emalloc(sizeof(struct color *) * NUM_COLORS);
|
args.colors = (struct color **) emalloc(sizeof(struct color *) * NUM_COLORS);
|
||||||
if(!parse_color(optarg, &args.colors)) {
|
if(!parse_color(optarg, &args.colors)) {
|
||||||
@@ -215,8 +225,11 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if(opt == args_chr[ARG_LIST]) {
|
||||||
|
args.list_gpus = true;
|
||||||
|
}
|
||||||
else if(opt == args_chr[ARG_HELP]) {
|
else if(opt == args_chr[ARG_HELP]) {
|
||||||
args.help_flag = true;
|
args.help_flag = true;
|
||||||
}
|
}
|
||||||
else if(opt == args_chr[ARG_VERSION]) {
|
else if(opt == args_chr[ARG_VERSION]) {
|
||||||
args.version_flag = true;
|
args.version_flag = true;
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ enum {
|
|||||||
enum {
|
enum {
|
||||||
ARG_COLOR,
|
ARG_COLOR,
|
||||||
ARG_GPU,
|
ARG_GPU,
|
||||||
|
ARG_LIST,
|
||||||
ARG_HELP,
|
ARG_HELP,
|
||||||
ARG_VERSION
|
ARG_VERSION
|
||||||
};
|
};
|
||||||
@@ -33,6 +34,7 @@ extern const char *args_str[];
|
|||||||
int max_arg_str_length();
|
int max_arg_str_length();
|
||||||
bool parse_args(int argc, char* argv[]);
|
bool parse_args(int argc, char* argv[]);
|
||||||
bool show_help();
|
bool show_help();
|
||||||
|
bool list_gpus();
|
||||||
bool show_version();
|
bool show_version();
|
||||||
void free_colors_struct(struct color** cs);
|
void free_colors_struct(struct color** cs);
|
||||||
int get_gpu_idx();
|
int get_gpu_idx();
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
#define __GLOBAL__
|
#define __GLOBAL__
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stddef.h>
|
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|
||||||
#define STRING_UNKNOWN "Unknown"
|
#define STRING_UNKNOWN "Unknown"
|
||||||
|
|||||||
@@ -116,17 +116,17 @@ char* get_str_l2(struct gpu_info* gpu) {
|
|||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_peak_performance(struct gpu_info* gpu) {
|
char* get_str_peak_performance_generic(int64_t pp) {
|
||||||
char* str;
|
char* str;
|
||||||
|
|
||||||
if(gpu->peak_performance == -1) {
|
if(pp == -1) {
|
||||||
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
||||||
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
|
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
|
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
|
||||||
double flopsd = (double) gpu->peak_performance;
|
double flopsd = (double) pp;
|
||||||
uint32_t max_size = 7+1+7+1;
|
uint32_t max_size = 7+1+7+1;
|
||||||
str = (char *) ecalloc(max_size, sizeof(char));
|
str = (char *) ecalloc(max_size, sizeof(char));
|
||||||
|
|
||||||
@@ -139,3 +139,12 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
|
|||||||
|
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char* get_str_peak_performance(struct gpu_info* gpu) {
|
||||||
|
return get_str_peak_performance_generic(gpu->peak_performance);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
|
||||||
|
return get_str_peak_performance_generic(gpu->peak_performance_t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
#include "../cuda/nvmlb.hpp"
|
|
||||||
#include "../cuda/pci.hpp"
|
#include "../cuda/pci.hpp"
|
||||||
|
|
||||||
#define UNKNOWN_FREQ -1
|
#define UNKNOWN_FREQ -1
|
||||||
@@ -41,6 +40,7 @@ struct topology {
|
|||||||
int32_t streaming_mp;
|
int32_t streaming_mp;
|
||||||
int32_t cores_per_mp;
|
int32_t cores_per_mp;
|
||||||
int32_t cuda_cores;
|
int32_t cuda_cores;
|
||||||
|
int32_t tensor_cores;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct memory {
|
struct memory {
|
||||||
@@ -57,11 +57,11 @@ struct gpu_info {
|
|||||||
char* name;
|
char* name;
|
||||||
int64_t freq;
|
int64_t freq;
|
||||||
struct pci* pci;
|
struct pci* pci;
|
||||||
struct nvml_data* nvmld;
|
|
||||||
struct topology* topo;
|
struct topology* topo;
|
||||||
struct memory* mem;
|
struct memory* mem;
|
||||||
struct cache* cach;
|
struct cache* cach;
|
||||||
int64_t peak_performance;
|
int64_t peak_performance;
|
||||||
|
int64_t peak_performance_t;
|
||||||
int32_t idx;
|
int32_t idx;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -74,5 +74,6 @@ char* get_str_bus_width(struct gpu_info* gpu);
|
|||||||
char* get_str_memory_clock(struct gpu_info* gpu);
|
char* get_str_memory_clock(struct gpu_info* gpu);
|
||||||
char* get_str_l2(struct gpu_info* gpu);
|
char* get_str_l2(struct gpu_info* gpu);
|
||||||
char* get_str_peak_performance(struct gpu_info* gpu);
|
char* get_str_peak_performance(struct gpu_info* gpu);
|
||||||
|
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
#include "../cuda/cuda.hpp"
|
#include "../cuda/cuda.hpp"
|
||||||
#include "../cuda/uarch.hpp"
|
#include "../cuda/uarch.hpp"
|
||||||
|
|
||||||
static const char* VERSION = "0.10";
|
static const char* VERSION = "0.11";
|
||||||
|
|
||||||
void print_help(char *argv[]) {
|
void print_help(char *argv[]) {
|
||||||
const char **t = args_str;
|
const char **t = args_str;
|
||||||
@@ -18,10 +18,11 @@ void print_help(char *argv[]) {
|
|||||||
printf("Simple yet fancy GPU architecture fetching tool\n\n");
|
printf("Simple yet fancy GPU architecture fetching tool\n\n");
|
||||||
|
|
||||||
printf("Options: \n");
|
printf("Options: \n");
|
||||||
printf(" -%c, --%s %*s Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), "");
|
printf(" -%c, --%s %*s Set the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), "");
|
||||||
printf(" -%c, --%s %*s Selects the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), "");
|
printf(" -%c, --%s %*s List the available GPUs in the system\n", c[ARG_LIST], t[ARG_LIST], (int) (max_len-strlen(t[ARG_LIST])), "");
|
||||||
printf(" -%c, --%s %*s Prints this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
|
printf(" -%c, --%s %*s Select the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), "");
|
||||||
printf(" -%c, --%s %*s Prints gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
|
printf(" -%c, --%s %*s Print this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
|
||||||
|
printf(" -%c, --%s %*s Print gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
|
||||||
|
|
||||||
printf("\nCOLORS: \n");
|
printf("\nCOLORS: \n");
|
||||||
printf(" Color scheme can be set using a predefined color scheme or a custom one:\n");
|
printf(" Color scheme can be set using a predefined color scheme or a custom one:\n");
|
||||||
@@ -64,6 +65,10 @@ int main(int argc, char* argv[]) {
|
|||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(list_gpus()) {
|
||||||
|
return print_gpus_list();
|
||||||
|
}
|
||||||
|
|
||||||
set_log_level(true);
|
set_log_level(true);
|
||||||
|
|
||||||
printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\
|
printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\
|
||||||
|
|||||||
45
src/common/pci.cpp
Normal file
45
src/common/pci.cpp
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
#include "global.hpp"
|
||||||
|
#include "pci.hpp"
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* doc: https://wiki.osdev.org/PCI#Class_Codes
|
||||||
|
* https://pci-ids.ucw.cz/read/PC
|
||||||
|
*/
|
||||||
|
#define VENDOR_ID_NVIDIA 0x10de
|
||||||
|
#define CLASS_VGA_CONTROLLER 0x0300
|
||||||
|
|
||||||
|
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices) {
|
||||||
|
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
|
||||||
|
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||||
|
return dev->vendor_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printErr("Unable to find a CUDA device using pciutils");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices) {
|
||||||
|
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
|
||||||
|
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||||
|
return dev->device_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printErr("Unable to find a CUDA device using pciutils");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pci_dev *get_pci_devices_from_pciutils() {
|
||||||
|
struct pci_access *pacc;
|
||||||
|
struct pci_dev *dev;
|
||||||
|
|
||||||
|
pacc = pci_alloc();
|
||||||
|
pci_init(pacc);
|
||||||
|
pci_scan_bus(pacc);
|
||||||
|
|
||||||
|
for (dev=pacc->devices; dev; dev=dev->next) {
|
||||||
|
pci_fill_info(dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pacc->devices;
|
||||||
|
}
|
||||||
13
src/common/pci.hpp
Normal file
13
src/common/pci.hpp
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
#ifndef __GPUFETCH_PCI__
|
||||||
|
#define __GPUFETCH_PCI__
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
extern "C" {
|
||||||
|
#include <pci/pci.h>
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices);
|
||||||
|
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices);
|
||||||
|
struct pci_dev *get_pci_devices_from_pciutils();
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -38,11 +38,13 @@ enum {
|
|||||||
ATTRIBUTE_STREAMINGMP,
|
ATTRIBUTE_STREAMINGMP,
|
||||||
ATTRIBUTE_CORESPERMP,
|
ATTRIBUTE_CORESPERMP,
|
||||||
ATTRIBUTE_CUDA_CORES,
|
ATTRIBUTE_CUDA_CORES,
|
||||||
|
ATTRIBUTE_TENSOR_CORES,
|
||||||
ATTRIBUTE_L2,
|
ATTRIBUTE_L2,
|
||||||
ATTRIBUTE_MEMORY,
|
ATTRIBUTE_MEMORY,
|
||||||
ATTRIBUTE_MEMORY_FREQ,
|
ATTRIBUTE_MEMORY_FREQ,
|
||||||
ATTRIBUTE_BUS_WIDTH,
|
ATTRIBUTE_BUS_WIDTH,
|
||||||
ATTRIBUTE_PEAK
|
ATTRIBUTE_PEAK,
|
||||||
|
ATTRIBUTE_PEAK_TENSOR,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* ATTRIBUTE_FIELDS [] = {
|
static const char* ATTRIBUTE_FIELDS [] = {
|
||||||
@@ -53,12 +55,14 @@ static const char* ATTRIBUTE_FIELDS [] = {
|
|||||||
"Max Frequency:",
|
"Max Frequency:",
|
||||||
"SMs:",
|
"SMs:",
|
||||||
"Cores/SM:",
|
"Cores/SM:",
|
||||||
"CUDA cores:",
|
"CUDA Cores:",
|
||||||
|
"Tensor Cores:",
|
||||||
"L2 Size:",
|
"L2 Size:",
|
||||||
"Memory:",
|
"Memory:",
|
||||||
"Memory frequency:",
|
"Memory frequency:",
|
||||||
"Bus width:",
|
"Bus width:",
|
||||||
"Peak Performance:",
|
"Peak Performance:",
|
||||||
|
"Peak Performance (TC):",
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
||||||
@@ -69,12 +73,14 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
|||||||
"Max Freq.:",
|
"Max Freq.:",
|
||||||
"SMs:",
|
"SMs:",
|
||||||
"Cores/SM:",
|
"Cores/SM:",
|
||||||
"CUDA cores:",
|
"CUDA Cores:",
|
||||||
|
"Tensor Cores:",
|
||||||
"L2 Size:",
|
"L2 Size:",
|
||||||
"Memory:",
|
"Memory:",
|
||||||
"Memory freq.:",
|
"Memory freq.:",
|
||||||
"Bus width:",
|
"Bus width:",
|
||||||
"Peak Perf.:",
|
"Peak Perf.:",
|
||||||
|
"Peak Perf.(TC):",
|
||||||
};
|
};
|
||||||
|
|
||||||
struct terminal {
|
struct terminal {
|
||||||
@@ -350,6 +356,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
|
|||||||
char* sms = get_str_sm(gpu);
|
char* sms = get_str_sm(gpu);
|
||||||
char* corespersm = get_str_cores_sm(gpu);
|
char* corespersm = get_str_cores_sm(gpu);
|
||||||
char* cores = get_str_cuda_cores(gpu);
|
char* cores = get_str_cuda_cores(gpu);
|
||||||
|
char* tensorc = get_str_tensor_cores(gpu);
|
||||||
char* max_frequency = get_str_freq(gpu);
|
char* max_frequency = get_str_freq(gpu);
|
||||||
char* l2 = get_str_l2(gpu);
|
char* l2 = get_str_l2(gpu);
|
||||||
char* mem_size = get_str_memory_size(gpu);
|
char* mem_size = get_str_memory_size(gpu);
|
||||||
@@ -357,6 +364,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
|
|||||||
char* mem_freq = get_str_memory_clock(gpu);
|
char* mem_freq = get_str_memory_clock(gpu);
|
||||||
char* bus_width = get_str_bus_width(gpu);
|
char* bus_width = get_str_bus_width(gpu);
|
||||||
char* pp = get_str_peak_performance(gpu);
|
char* pp = get_str_peak_performance(gpu);
|
||||||
|
char* pp_tensor = get_str_peak_performance_tensor(gpu);
|
||||||
|
|
||||||
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
|
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
|
||||||
sprintf(mem, "%s %s", mem_size, mem_type);
|
sprintf(mem, "%s %s", mem_size, mem_type);
|
||||||
@@ -372,11 +380,17 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
|
|||||||
setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
|
setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
|
||||||
setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm);
|
setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm);
|
||||||
setAttribute(art, ATTRIBUTE_CUDA_CORES, cores);
|
setAttribute(art, ATTRIBUTE_CUDA_CORES, cores);
|
||||||
|
if(gpu->topo->tensor_cores >= 0) {
|
||||||
|
setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc);
|
||||||
|
}
|
||||||
setAttribute(art, ATTRIBUTE_MEMORY, mem);
|
setAttribute(art, ATTRIBUTE_MEMORY, mem);
|
||||||
setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq);
|
setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq);
|
||||||
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
|
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
|
||||||
setAttribute(art, ATTRIBUTE_L2, l2);
|
setAttribute(art, ATTRIBUTE_L2, l2);
|
||||||
setAttribute(art, ATTRIBUTE_PEAK, pp);
|
setAttribute(art, ATTRIBUTE_PEAK, pp);
|
||||||
|
if(gpu->topo->tensor_cores >= 0) {
|
||||||
|
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
|
||||||
|
}
|
||||||
|
|
||||||
const char** attribute_fields = ATTRIBUTE_FIELDS;
|
const char** attribute_fields = ATTRIBUTE_FIELDS;
|
||||||
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
|
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
|
||||||
|
|||||||
@@ -2,10 +2,46 @@
|
|||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
#include "cuda.hpp"
|
#include "cuda.hpp"
|
||||||
#include "nvmlb.hpp"
|
|
||||||
#include "uarch.hpp"
|
#include "uarch.hpp"
|
||||||
|
#include "../common/pci.hpp"
|
||||||
#include "../common/global.hpp"
|
#include "../common/global.hpp"
|
||||||
|
|
||||||
|
int print_gpus_list() {
|
||||||
|
cudaError_t err = cudaSuccess;
|
||||||
|
int num_gpus = -1;
|
||||||
|
|
||||||
|
if ((err = cudaGetDeviceCount(&num_gpus)) != cudaSuccess) {
|
||||||
|
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
printf("CUDA GPUs available: %d\n", num_gpus);
|
||||||
|
|
||||||
|
if(num_gpus > 0) {
|
||||||
|
cudaDeviceProp deviceProp;
|
||||||
|
int max_len = 0;
|
||||||
|
|
||||||
|
for(int idx=0; idx < num_gpus; idx++) {
|
||||||
|
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
|
||||||
|
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
max_len = max(max_len, (int) strlen(deviceProp.name));
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i=0; i < max_len + 32; i++) putchar('-');
|
||||||
|
putchar('\n');
|
||||||
|
for(int idx=0; idx < num_gpus; idx++) {
|
||||||
|
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
|
||||||
|
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
||||||
|
return EXIT_FAILURE;
|
||||||
|
}
|
||||||
|
printf("GPU %d: %s (Compute Capability %d.%d)\n", idx, deviceProp.name, deviceProp.major, deviceProp.minor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
struct cache* get_cache_info(cudaDeviceProp prop) {
|
struct cache* get_cache_info(cudaDeviceProp prop) {
|
||||||
struct cache* cach = (struct cache*) emalloc(sizeof(struct cache));
|
struct cache* cach = (struct cache*) emalloc(sizeof(struct cache));
|
||||||
|
|
||||||
@@ -17,12 +53,19 @@ struct cache* get_cache_info(cudaDeviceProp prop) {
|
|||||||
return cach;
|
return cach;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int get_tensor_cores(int sm, int major) {
|
||||||
|
if(major == 7) return sm * 8;
|
||||||
|
else if(major == 8) return sm * 4;
|
||||||
|
else return 0;
|
||||||
|
}
|
||||||
|
|
||||||
struct topology* get_topology_info(cudaDeviceProp prop) {
|
struct topology* get_topology_info(cudaDeviceProp prop) {
|
||||||
struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
|
struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
|
||||||
|
|
||||||
topo->streaming_mp = prop.multiProcessorCount;
|
topo->streaming_mp = prop.multiProcessorCount;
|
||||||
topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
|
topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
|
||||||
topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
|
topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
|
||||||
|
topo->tensor_cores = get_tensor_cores(topo->streaming_mp, prop.major);
|
||||||
|
|
||||||
return topo;
|
return topo;
|
||||||
}
|
}
|
||||||
@@ -60,10 +103,16 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
|
|||||||
return mem;
|
return mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute peak performance when using CUDA cores
|
||||||
int64_t get_peak_performance(struct gpu_info* gpu) {
|
int64_t get_peak_performance(struct gpu_info* gpu) {
|
||||||
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
|
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compute peak performance when using tensor cores
|
||||||
|
int64_t get_peak_performance_t(struct gpu_info* gpu) {
|
||||||
|
return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
|
||||||
|
}
|
||||||
|
|
||||||
struct gpu_info* get_gpu_info(int gpu_idx) {
|
struct gpu_info* get_gpu_info(int gpu_idx) {
|
||||||
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
||||||
gpu->pci = NULL;
|
gpu->pci = NULL;
|
||||||
@@ -106,38 +155,39 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
|
|||||||
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
|
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
|
||||||
strcpy(gpu->name, deviceProp.name);
|
strcpy(gpu->name, deviceProp.name);
|
||||||
|
|
||||||
gpu->nvmld = nvml_init();
|
struct pci_dev *devices = get_pci_devices_from_pciutils();
|
||||||
if(nvml_get_pci_info(gpu->idx, gpu->nvmld)) {
|
gpu->pci = get_pci_from_pciutils(devices);
|
||||||
gpu->pci = get_pci_from_nvml(gpu->nvmld);
|
|
||||||
}
|
|
||||||
|
|
||||||
gpu->arch = get_uarch_from_cuda(gpu);
|
gpu->arch = get_uarch_from_cuda(gpu);
|
||||||
gpu->cach = get_cache_info(deviceProp);
|
gpu->cach = get_cache_info(deviceProp);
|
||||||
gpu->mem = get_memory_info(gpu, deviceProp);
|
gpu->mem = get_memory_info(gpu, deviceProp);
|
||||||
gpu->topo = get_topology_info(deviceProp);
|
gpu->topo = get_topology_info(deviceProp);
|
||||||
gpu->peak_performance = get_peak_performance(gpu);
|
gpu->peak_performance = get_peak_performance(gpu);
|
||||||
|
gpu->peak_performance_t = get_peak_performance_t(gpu);
|
||||||
|
|
||||||
return gpu;
|
return gpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_sm(struct gpu_info* gpu) {
|
char* get_str_generic(int32_t data) {
|
||||||
uint32_t max_size = 10;
|
// Largest int is 10, +1 for possible negative, +1 for EOL
|
||||||
|
uint32_t max_size = 12;
|
||||||
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
||||||
snprintf(dummy, max_size, "%d", gpu->topo->streaming_mp);
|
snprintf(dummy, max_size, "%d", data);
|
||||||
return dummy;
|
return dummy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char* get_str_sm(struct gpu_info* gpu) {
|
||||||
|
return get_str_generic(gpu->topo->streaming_mp);
|
||||||
|
}
|
||||||
|
|
||||||
char* get_str_cores_sm(struct gpu_info* gpu) {
|
char* get_str_cores_sm(struct gpu_info* gpu) {
|
||||||
uint32_t max_size = 10;
|
return get_str_generic(gpu->topo->cores_per_mp);
|
||||||
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
|
||||||
snprintf(dummy, max_size, "%d", gpu->topo->cores_per_mp);
|
|
||||||
return dummy;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_cuda_cores(struct gpu_info* gpu) {
|
char* get_str_cuda_cores(struct gpu_info* gpu) {
|
||||||
uint32_t max_size = 10;
|
return get_str_generic(gpu->topo->cuda_cores);
|
||||||
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
}
|
||||||
snprintf(dummy, max_size, "%d", gpu->topo->cuda_cores);
|
|
||||||
return dummy;
|
char* get_str_tensor_cores(struct gpu_info* gpu) {
|
||||||
|
return get_str_generic(gpu->topo->tensor_cores);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,10 @@
|
|||||||
#include "../common/gpu.hpp"
|
#include "../common/gpu.hpp"
|
||||||
|
|
||||||
struct gpu_info* get_gpu_info(int gpu_idx);
|
struct gpu_info* get_gpu_info(int gpu_idx);
|
||||||
|
int print_gpus_list();
|
||||||
char* get_str_sm(struct gpu_info* gpu);
|
char* get_str_sm(struct gpu_info* gpu);
|
||||||
char* get_str_cores_sm(struct gpu_info* gpu);
|
char* get_str_cores_sm(struct gpu_info* gpu);
|
||||||
char* get_str_cuda_cores(struct gpu_info* gpu);
|
char* get_str_cuda_cores(struct gpu_info* gpu);
|
||||||
|
char* get_str_tensor_cores(struct gpu_info* gpu);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,70 +0,0 @@
|
|||||||
#include <nvml.h>
|
|
||||||
|
|
||||||
#include "nvmlb.hpp"
|
|
||||||
#include "../common/global.hpp"
|
|
||||||
|
|
||||||
struct nvml_data {
|
|
||||||
bool nvml_started;
|
|
||||||
nvmlPciInfo_t pci;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct nvml_data* nvml_init() {
|
|
||||||
struct nvml_data* data = (struct nvml_data*) emalloc(sizeof(struct nvml_data));
|
|
||||||
data->nvml_started = false;
|
|
||||||
|
|
||||||
nvmlReturn_t result;
|
|
||||||
|
|
||||||
if ((result = nvmlInit()) != NVML_SUCCESS) {
|
|
||||||
printErr("nvmlInit: %s\n", nvmlErrorString(result));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
data->nvml_started = true;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool nvml_get_pci_info(int gpu_idx, struct nvml_data* data) {
|
|
||||||
nvmlReturn_t result;
|
|
||||||
nvmlDevice_t device;
|
|
||||||
|
|
||||||
if(!data->nvml_started) {
|
|
||||||
printErr("nvml_get_pci_info: nvml was not started");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((result = nvmlDeviceGetHandleByIndex(gpu_idx, &device)) != NVML_SUCCESS) {
|
|
||||||
printErr("nvmlDeviceGetHandleByIndex: %s\n", nvmlErrorString(result));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != NVML_SUCCESS) {
|
|
||||||
printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint16_t nvml_get_pci_vendor_id(struct nvml_data* data) {
|
|
||||||
return data->pci.pciDeviceId & 0x0000FFFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint16_t nvml_get_pci_device_id(struct nvml_data* data) {
|
|
||||||
return (data->pci.pciDeviceId & 0xFFFF0000) >> 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool nvml_shutdown(struct nvml_data* data) {
|
|
||||||
nvmlReturn_t result;
|
|
||||||
|
|
||||||
if(!data->nvml_started) {
|
|
||||||
printWarn("nvml_get_pci_info: nvml was not started");
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((result = nvmlShutdown()) != NVML_SUCCESS) {
|
|
||||||
printErr("nvmlShutdown: %s\n", nvmlErrorString(result));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
// NVML Backend
|
|
||||||
#ifndef __NVMLB__
|
|
||||||
#define __NVMLB__
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
struct nvml_data;
|
|
||||||
|
|
||||||
struct nvml_data* nvml_init();
|
|
||||||
bool nvml_get_pci_info(int dev, struct nvml_data* data);
|
|
||||||
uint16_t nvml_get_pci_vendor_id(struct nvml_data* data);
|
|
||||||
uint16_t nvml_get_pci_device_id(struct nvml_data* data);
|
|
||||||
bool nvml_shutdown(struct nvml_data* data);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "pci.hpp"
|
#include "pci.hpp"
|
||||||
#include "nvmlb.hpp"
|
|
||||||
#include "chips.hpp"
|
#include "chips.hpp"
|
||||||
#include "../common/global.hpp"
|
#include "../common/global.hpp"
|
||||||
|
#include "../common/pci.hpp"
|
||||||
|
|
||||||
#define CHECK_PCI_START if (false) {}
|
#define CHECK_PCI_START if (false) {}
|
||||||
#define CHECK_PCI(pci, id, chip) \
|
#define CHECK_PCI(pci, id, chip) \
|
||||||
@@ -15,11 +15,11 @@ struct pci {
|
|||||||
uint16_t device_id;
|
uint16_t device_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pci* get_pci_from_nvml(struct nvml_data* data) {
|
struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
|
||||||
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
|
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
|
||||||
|
|
||||||
pci->vendor_id = nvml_get_pci_vendor_id(data);
|
pci->vendor_id = pciutils_get_pci_vendor_id(devices);
|
||||||
pci->device_id = nvml_get_pci_device_id(data);
|
pci->device_id = pciutils_get_pci_device_id(devices);
|
||||||
|
|
||||||
return pci;
|
return pci;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
#ifndef __PCI__
|
#ifndef __PCI_CUDA__
|
||||||
#define __PCI__
|
#define __PCI_CUDA__
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "nvmlb.hpp"
|
|
||||||
|
#include "../common/pci.hpp"
|
||||||
#include "chips.hpp"
|
#include "chips.hpp"
|
||||||
|
|
||||||
struct pci;
|
struct pci;
|
||||||
|
|
||||||
struct pci* get_pci_from_nvml(struct nvml_data* data);
|
struct pci* get_pci_from_pciutils(struct pci_dev *devices);
|
||||||
GPUCHIP get_chip_from_pci(struct pci* pci);
|
GPUCHIP get_chip_from_pci(struct pci* pci);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user