33 Commits
v0.10 ... intel

Author SHA1 Message Date
Dr-Noob
a397eb398e [v0.11] Handle the case where the GPU is not found in the pci LUT 2021-12-18 20:12:41 +01:00
Dr-Noob
bfb9738132 [v0.11] Do not show error message when there is no Intel iGPU 2021-12-18 10:35:51 +01:00
Dr-Noob
6d4d8b621b [v0.11] Fix compilation error and ambiguity with CUDA and Intel backend when enabled at the same time due to functions with the same name 2021-12-18 10:14:14 +01:00
Dr-Noob
93889b2b18 [v0.11] Small adjustments to fix compilation on older compilers 2021-12-10 16:18:39 +01:00
Dr-Noob
b6ce96e746 [v0.11] Add missing Intel iGPU topologies. Add script to check for missing topo/uarchs 2021-12-10 15:55:59 +01:00
Dr-Noob
5f52f73fe0 [v0.11] Completed most of Intel iGPU topologies 2021-12-10 15:32:29 +01:00
Dr-Noob
e5deeb1309 [v0.11] Adding more Intel iGPU topologies 2021-12-10 15:16:29 +01:00
Dr-Noob
44a884fd07 [v0.11] Print peak performance in Intel iGPU 2021-12-09 20:28:07 +01:00
Dr-Noob
1663a36135 [v0.11] Fetch and print max Intel iGPU frequency using sysfs 2021-12-09 20:18:39 +01:00
Dr-Noob
844377f17a [v0.11] Add support for printing EUs (currently only in Gen9/Gen9.5) 2021-12-08 11:15:59 +01:00
Dr-Noob
2034bac006 [v0.11] Displaying Graphics Tier in Intel iGPUs 2021-11-27 14:02:02 +01:00
Dr-Noob
e7c4d5bf91 [v0.11] Adding Gen6, 7, 7.5 and 8 to database 2021-11-27 12:23:41 +01:00
Dr-Noob
b00050e739 [v0.11] Print available more information for iGPU 2021-11-27 11:22:16 +01:00
Dr-Noob
8db60b614d [v0.11] Adding most of Gen9/9.5 iGPUs to database 2021-11-27 11:10:01 +01:00
Dr-Noob
8740337145 [v0.11] Adding uarch backend for intel iGPUs 2021-11-26 12:52:45 +01:00
Dr-Noob
ce004725ad [v0.11] Working in printer backend to show logo and text for intel iGPU 2021-11-26 09:58:45 +01:00
Dr-Noob
310486a6a2 [v0.11] Fixes to recover CUDA functionality, ready for implementing Intel iGPU code 2021-11-26 09:33:57 +01:00
Dr-Noob
e5a4f91b20 [v0.11] Hacky way to solve CMake issues without requiring newer CMake versions 2021-11-26 09:19:24 +01:00
Dr-Noob
461e0d2ede [v0.11] Working in master GPU handler for supporting diverse GPU vendors 2021-11-26 08:22:30 +01:00
Dr-Noob
149e5ad62c [v0.11] Working for future support of Intel iGPUs 2021-11-25 19:03:52 +01:00
Dr-Noob
3502f48f71 [v0.11] Style adjustments in README 2021-11-25 18:06:00 +01:00
Dr-Noob
5acb4ff7dc [v0.11] Small style adjustments in README 2021-11-25 18:01:57 +01:00
Dr-Noob
074c159e5f [v0.11] Update README image 2021-11-25 17:58:57 +01:00
Dr-Noob
cedcfecb80 [v0.11] Dont show tensor cores when there is 0. Use MMA (matrix multiply accumulate) instead of TC (tensor cores) 2021-11-25 17:52:58 +01:00
Dr-Noob
32b2c59b50 [v0.11] Add peak performance with tensor cores to the output 2021-11-23 18:49:34 +01:00
Dr-Noob
8bf0276aae [v0.10] Simple refactoring 2021-11-23 18:17:12 +01:00
Dr-Noob
821b6e760e [v0.10] Add support for displaying the number of tensor cores 2021-11-23 18:09:13 +01:00
Dr-Noob
f212fb88d4 [v0.10] Fix pci initialization 2021-09-08 08:17:06 +02:00
Dr-Noob
81607151dc [v0.10] Update build script and README 2021-09-04 16:02:50 +02:00
Dr-Noob
bdf9eb0079 [v0.10] Use CMake instead of Make, which will take care of pciutils automatically if it is not installed 2021-09-04 14:05:16 +02:00
Dr-Noob
039e7c350d [v0.10] Replace nvml by pciutils to get pci ids. Needs work to integrate it properly. NVML is enough in the case of NVIDIA GPUs, but because more GPUs will be added in the future, a solution like pciutils is needed 2021-09-04 12:19:42 +02:00
Dr-Noob
4b4d1bc030 [v0.10] Add --list-gpus option 2021-08-23 22:39:31 +02:00
Dr-Noob
d00e3f183d [v0.10] Add simple man page 2021-08-23 22:02:45 +02:00
40 changed files with 1371 additions and 344 deletions

119
CMakeLists.txt Normal file
View File

@@ -0,0 +1,119 @@
cmake_minimum_required(VERSION 3.10)
include(CheckLanguage)
include(ExternalProject)
project(gpufetch CXX)
set(SRC_DIR "src")
set(COMMON_DIR "${SRC_DIR}/common")
set(CUDA_DIR "${SRC_DIR}/cuda")
set(INTEL_DIR "${SRC_DIR}/intel")
if(NOT DEFINED ENABLE_INTEL_BACKEND)
set(ENABLE_INTEL_BACKEND true)
endif()
if(NOT DEFINED ENABLE_CUDA_BACKEND OR ENABLE_CUDA_BACKEND)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
set(ENABLE_CUDA_BACKEND true)
# Must link_directories early so add_executable(gpufetch ...) gets the right directories
link_directories(cuda_backend ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
else()
set(ENABLE_CUDA_BACKEND false)
endif()
endif()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
find_package(PCIUTILS)
if(NOT ${PCIUTILS_FOUND})
message(STATUS "${BoldYellow}pciutils not found, downloading and building a local copy...${ColorReset}")
# Download and build pciutils
set(PCIUTILS_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/pciutils-install)
ExternalProject_Add(pciutils
GIT_REPOSITORY https://github.com/pciutils/pciutils
CONFIGURE_COMMAND ""
BUILD_COMMAND make SHARED=no
BUILD_IN_SOURCE true
INSTALL_COMMAND make PREFIX=${PCIUTILS_INSTALL_LOCATION} install-lib
)
include_directories(${PCIUTILS_INSTALL_LOCATION}/include)
link_directories(${PCIUTILS_INSTALL_LOCATION}/lib)
else()
include_directories(${PCIUTILS_INCLUDE_DIR})
link_libraries(${PCIUTILS_LIBRARIES})
endif()
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11")
if(ENABLE_INTEL_BACKEND)
target_compile_definitions(gpufetch PUBLIC BACKEND_INTEL)
add_library(intel_backend STATIC ${INTEL_DIR}/intel.cpp ${INTEL_DIR}/pci.cpp ${INTEL_DIR}/uarch.cpp ${INTEL_DIR}/udev.cpp)
if(NOT ${PCIUTILS_FOUND})
add_dependencies(intel_backend pciutils)
endif()
target_link_libraries(gpufetch intel_backend)
endif()
if(ENABLE_CUDA_BACKEND)
target_compile_definitions(gpufetch PUBLIC BACKEND_CUDA)
# https://en.wikipedia.org/w/index.php?title=CUDA&section=5#GPUs_supported
# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86)
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75)
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72)
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
endif()
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
if(NOT ${PCIUTILS_FOUND})
add_dependencies(cuda_backend pciutils)
endif()
target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
target_link_libraries(cuda_backend PRIVATE cudart)
target_link_libraries(gpufetch cuda_backend)
endif()
target_link_libraries(gpufetch pci z)
install(TARGETS gpufetch DESTINATION bin)
if(NOT WIN32)
string(ASCII 27 Esc)
set(ColorReset "${Esc}[m")
set(ColorBold "${Esc}[1m")
set(Red "${Esc}[31m")
set(Green "${Esc}[32m")
set(BoldRed "${Esc}[1;31m")
set(BoldGreen "${Esc}[1;32m")
set(BoldYellow "${Esc}[1;33m")
endif()
message(STATUS "----------------------")
message(STATUS "gpufetch build report:")
if(ENABLE_INTEL_BACKEND)
message(STATUS "Intel backend: ${BoldGreen}ON${ColorReset}")
else()
message(STATUS "Intel backend: ${BoldRed}OFF${ColorReset}")
endif()
if(ENABLE_CUDA_BACKEND)
message(STATUS "CUDA backend: ${BoldGreen}ON${ColorReset}")
else()
message(STATUS "CUDA backend: ${BoldRed}OFF${ColorReset}")
endif()
message(STATUS "----------------------")

View File

@@ -1,53 +0,0 @@
CXX ?= g++
CUDA_PATH ?= /usr/local/cuda/
PREFIX ?= /usr
CXXFLAGS+=-Wall -Wextra -pedantic -fstack-protector-all -pedantic
SANITY_FLAGS=-Wfloat-equal -Wshadow -Wpointer-arith
SRC_COMMON=src/common/
SRC_CUDA=src/cuda/
COMMON_SRC = $(SRC_COMMON)main.cpp $(SRC_COMMON)gpu.cpp $(SRC_COMMON)args.cpp $(SRC_COMMON)global.cpp $(SRC_COMMON)printer.cpp
COMMON_HDR = $(SRC_COMMON)ascii.hpp $(SRC_COMMON)gpu.hpp $(SRC_COMMON)args.hpp $(SRC_COMMON)global.hpp $(SRC_COMMON)printer.hpp
CUDA_SRC = $(SRC_CUDA)cuda.cpp $(SRC_CUDA)uarch.cpp $(SRC_CUDA)pci.cpp $(SRC_CUDA)nvmlb.cpp
CUDA_HDR = $(SRC_CUDA)cuda.hpp $(SRC_CUDA)uarch.hpp $(SRC_CUDA)pci.hpp $(SRC_CUDA)nvmlb.hpp $(SRC_CUDA)chips.hpp
SOURCE += $(COMMON_SRC) $(CUDA_SRC)
HEADERS += $(COMMON_HDR) $(CUDA_HDR)
OUTPUT=gpufetch
CXXFLAGS+= -I $(CUDA_PATH)/samples/common/inc -I $(CUDA_PATH)/targets/x86_64-linux/include -L $(CUDA_PATH)/targets/x86_64-linux/lib -lcudart -lnvidia-ml
all: CXXFLAGS += -O3
all: $(OUTPUT)
debug: CXXFLAGS += -g -O0
debug: $(OUTPUT)
static: CXXFLAGS += -static -O3
static: $(OUTPUT)
strict: CXXFLAGS += -O3 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2
strict: $(OUTPUT)
$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
$(CXX) $(CXXFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)
run: $(OUTPUT)
./$(OUTPUT)
clean:
@rm -f $(OUTPUT)
install: $(OUTPUT)
install -Dm755 "gpufetch" "$(DESTDIR)$(PREFIX)/bin/gpufetch"
install -Dm644 "LICENSE" "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
install -Dm644 "gpufetch.1" "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"
uninstall:
rm -f "$(DESTDIR)$(PREFIX)/bin/gpufetch"
rm -f "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
rm -f "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"

View File

@@ -1,19 +1,30 @@
<p align="center"><img width=50% src="./pictures/gpufetch.png"></p> <p align="center"><img width=50% src="./pictures/gpufetch.png"></p>
<div align="center">
![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/Dr-Noob/gpufetch?label=gpufetch)
[![GitHub Repo stars](https://img.shields.io/github/stars/Dr-Noob/gpufetch?color=4CC61F)](https://github.com/Dr-Noob/gpufetch/stargazers)
[![GitHub issues](https://img.shields.io/github/issues/Dr-Noob/gpufetch)](https://github.com/Dr-Noob/gpufetch/issues)
[![License](https://img.shields.io/github/license/Dr-Noob/gpufetch?color=orange)](https://github.com/Dr-Noob/gpufetch/blob/master/LICENSE)
<h4 align="center">Simple yet fancy GPU architecture fetching tool</h4> <h4 align="center">Simple yet fancy GPU architecture fetching tool</h4>
&nbsp;
![gpu_img](pictures/2080ti.png) <p align="center"> </p>
<div align="center">
<img height="22px" src="https://img.shields.io/github/v/tag/Dr-Noob/gpufetch?label=gpufetch&style=flat-square">
<a href="https://github.com/Dr-Noob/gpufetch/stargazers">
<img height="22px" src="https://img.shields.io/github/stars/Dr-Noob/gpufetch?color=4CC61F&style=flat-square">
</a>
<a href="https://github.com/Dr-Noob/gpufetch/issues">
<img height="22px" src="https://img.shields.io/github/issues/Dr-Noob/gpufetch?style=flat-square">
</a>
<a href="https://github.com/Dr-Noob/gpufetch/blob/master/LICENSE">
<img height="22px" src="https://img.shields.io/github/license/Dr-Noob/gpufetch?color=orange&style=flat-square">
</a>
</div> </div>
<p align="center"> </p>
<p align="center">
gpufetch is a command-line tool written in C that displays the GPU information in a clean and beautiful way
</p>
<p align="center"><img width=80% src="./pictures/2080ti.png"></p>
# Table of contents # Table of contents
<!-- UPDATE with: doctoc --notitle README.md --> <!-- UPDATE with: doctoc --notitle README.md -->
<!-- START doctoc generated TOC please keep comment here to allow auto update --> <!-- START doctoc generated TOC please keep comment here to allow auto update -->
@@ -31,18 +42,28 @@
gpufetch supports NVIDIA GPUs under Linux only. gpufetch supports NVIDIA GPUs under Linux only.
# 2. Installation (building from source) # 2. Installation (building from source)
You will need a C++ compiler (e.g, `g++`), `make` and CUDA to compile `gpufetch`. To do so, just clone the repo and run `make`: You will need:
- C++ compiler (e.g, `g++`)
- `cmake`
- `make`
- CUDA (NVIDIA backend)
- pciutils (optional)
To build gpufetch, just clone the repo and run `./build.sh`:
``` ```
git clone https://github.com/Dr-Noob/gpufetch git clone https://github.com/Dr-Noob/gpufetch
cd gpufetch cd gpufetch
make ./build.sh
./gpufetch ./gpufetch
``` ```
When building gpufetch, you may encounter an error telling you that it cannot find some CUDA header files. In this case, is very likely that the Makefile is unable to find your CUDA installation. This can be solved by setting `CUDA_PATH` to the correct CUDA installation path. For example:
- NOTE 1: It is recomended to install the `pciutils` development package, which is needed by gpufetch. If it is not installed, it will be downloaded and built automatically just to compile gpufetch.
- NOTE 2: When building gpufetch, cmake may fail if it is unable to find the CUDA installation. If CUDA is installed but CMake does not find it, you need to pass the CUDA path to cmake. You can do this easily by editing directly the `build.sh` script. For example:
``` ```
CUDA_PATH=/opt/cuda make cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
``` ```
# 3. Colors and style # 3. Colors and style

19
build.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# gpufetch build script
set -e
rm -rf build/ gpufetch
mkdir build/
cd build/
# In case you have CUDA installed but it is not detected,
# - set CMAKE_CUDA_COMPILER to your nvcc binary:
# - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir
# for example:
# cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
cmake ..
make -j$(nproc)
cd -
ln -s build/gpufetch .

29
cmake/FindPCIUTILS.cmake Normal file
View File

@@ -0,0 +1,29 @@
# - Try to find the pciutils directory library
# Once done this will define
#
# PCIUTILS_FOUND - system has PCIUtils
# PCIUTILS_INCLUDE_DIR - the PCIUTILS include directory
# PCIUTILS_LIBRARIES - The libraries needed to use PCIUtils
if(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
set(PCIUTILS_FIND_QUIETLY TRUE)
endif(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
FIND_PATH(PCIUTILS_INCLUDE_DIR pci/pci.h)
FIND_LIBRARY(PCIUTILS_LIBRARY NAMES pci)
if(PCIUTILS_LIBRARY)
FIND_LIBRARY(RESOLV_LIBRARY NAMES resolv)
if(RESOLV_LIBRARY)
set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY} ${RESOLV_LIBRARY})
else(RESOLV_LIBRARY)
set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY})
endif(RESOLV_LIBRARY)
endif(PCIUTILS_LIBRARY)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCIUTILS DEFAULT_MSG PCIUTILS_LIBRARIES PCIUTILS_INCLUDE_DIR)
MARK_AS_ADVANCED(PCIUTILS_INCLUDE_DIR PCIUTILS_LIBRARIES)

47
gpufetch.1 Normal file
View File

@@ -0,0 +1,47 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.48.3.
.TH GPUFETCH "1" "August 2021" "gpufetch v0.10" "User Commands"
.SH NAME
gpufetch
.SH SYNOPSIS
.B gpufetch
[\fI\,OPTION\/\fR]...
.SH DESCRIPTION
Simple yet fancy GPU architecture fetching tool
.SH OPTIONS
.TP
\fB\-c\fR, \fB\-\-color\fR
Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation
.TP
\fB\-g\fR, \fB\-\-gpu\fR
Selects the GPU to use (default: 0)
.TP
\fB\-h\fR, \fB\-\-help\fR
Prints this help and exit
.TP
\fB\-V\fR, \fB\-\-version\fR
Prints gpufetch version and exit
.SS "COLORS:"
.IP
Color scheme can be set using a predefined color scheme or a custom one:
1. To use a predefined color scheme, the name of the scheme must be provided. Possible values are:
* "nvidia": Use NVIDIA default color scheme
2. To use a custom color scheme, 4 colors must be given in RGB with the format: R,G,B:R,G,B:...
The first 2 colors are the GPU art color and the following 2 colors are the text colors
.SS "EXAMPLES:"
.IP
Run gpufetch with NVIDIA color scheme:
.IP
\&./gpufetch \fB\-\-color\fR nvidia
.IP
Run gpufetch with a custom color scheme:
.IP
\&./gpufetch \fB\-\-color\fR 239,90,45:210,200,200:100,200,45:0,200,200
.SS "BUGS:"
.IP
Report bugs to https://github.com/Dr\-Noob/gpufetch/issues
.SS "NOTE:"
.IP
Peak performance information is NOT accurate. gpufetch computes peak performance using the max
frequency. However, to properly compute peak performance, you need to know the frequency of the
GPU running real code.
For peak performance measurement see: https://github.com/Dr\-Noob/peakperf

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

After

Width:  |  Height:  |  Size: 882 KiB

View File

@@ -13,12 +13,18 @@
#define NUM_COLORS 4 #define NUM_COLORS 4
#define COLOR_STR_NVIDIA "nvidia" #define COLOR_STR_NVIDIA "nvidia"
#define COLOR_STR_INTEL "intel"
#define COLOR_DEFAULT_NVIDIA "118,185,0:255,255,255:255,255,255:118,185,0" // +-----------------------+-----------------------+
// | Color logo | Color text |
// | Color 1 | Color 2 | Color 1 | Color 2 |
#define COLOR_DEFAULT_NVIDIA "118,185,000:255,255,255:255,255,255:118,185,000"
#define COLOR_DEFAULT_INTEL "015,125,194:230,230,230:040,150,220:230,230,230"
struct args_struct { struct args_struct {
bool help_flag; bool help_flag;
bool version_flag; bool version_flag;
bool list_gpus;
int gpu_idx; int gpu_idx;
STYLE style; STYLE style;
struct color** colors; struct color** colors;
@@ -28,17 +34,19 @@ int errn = 0;
static struct args_struct args; static struct args_struct args;
const char args_chr[] = { const char args_chr[] = {
/* [ARG_CHAR_COLOR] = */ 'c', /* [ARG_COLOR] = */ 'c',
/* [ARG_CHAR_GPU] = */ 'g', /* [ARG_GPU] = */ 'g',
/* [ARG_CHAR_HELP] = */ 'h', /* [ARG_LIST] = */ 'l',
/* [ARG_CHAR_VERSION] = */ 'V', /* [ARG_HELP] = */ 'h',
/* [ARG_VERSION] = */ 'V',
}; };
const char *args_str[] = { const char *args_str[] = {
/* [ARG_CHAR_COLOR] = */ "color", /* [ARG_COLOR] = */ "color",
/* [ARG_CHAR_GPU] = */ "gpu", /* [ARG_GPU] = */ "gpu",
/* [ARG_CHAR_HELP] = */ "help", /* [ARG_LIST] = */ "list-gpus",
/* [ARG_CHAR_VERSION] = */ "version", /* [ARG_HELP] = */ "help",
/* [ARG_VERSION] = */ "version",
}; };
int getarg_int(char* str) { int getarg_int(char* str) {
@@ -100,6 +108,10 @@ bool show_help() {
return args.help_flag; return args.help_flag;
} }
bool list_gpus() {
return args.list_gpus;
}
bool show_version() { bool show_version() {
return args.version_flag; return args.version_flag;
} }
@@ -119,8 +131,9 @@ char* build_short_options() {
char* str = (char *) emalloc(sizeof(char) * (len*2 + 1)); char* str = (char *) emalloc(sizeof(char) * (len*2 + 1));
memset(str, 0, sizeof(char) * (len*2 + 1)); memset(str, 0, sizeof(char) * (len*2 + 1));
sprintf(str, "%c:%c:%c%c", c[ARG_GPU], sprintf(str, "%c:%c:%c%c%c", c[ARG_GPU],
c[ARG_COLOR], c[ARG_HELP], c[ARG_VERSION]); c[ARG_COLOR], c[ARG_HELP], c[ARG_LIST],
c[ARG_VERSION]);
return str; return str;
} }
@@ -137,6 +150,7 @@ bool parse_color(char* optarg_str, struct color*** cs) {
bool free_ptr = true; bool free_ptr = true;
if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA; if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA;
else if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) color_to_copy = COLOR_DEFAULT_INTEL;
else { else {
str_to_parse = optarg_str; str_to_parse = optarg_str;
free_ptr = false; free_ptr = false;
@@ -185,12 +199,14 @@ bool parse_args(int argc, char* argv[]) {
args.version_flag = false; args.version_flag = false;
args.help_flag = false; args.help_flag = false;
args.list_gpus = false;
args.gpu_idx = 0; args.gpu_idx = 0;
args.colors = NULL; args.colors = NULL;
const struct option long_options[] = { const struct option long_options[] = {
{args_str[ARG_COLOR], required_argument, 0, args_chr[ARG_COLOR] }, {args_str[ARG_COLOR], required_argument, 0, args_chr[ARG_COLOR] },
{args_str[ARG_GPU], required_argument, 0, args_chr[ARG_GPU] }, {args_str[ARG_GPU], required_argument, 0, args_chr[ARG_GPU] },
{args_str[ARG_LIST], no_argument, 0, args_chr[ARG_LIST] },
{args_str[ARG_HELP], no_argument, 0, args_chr[ARG_HELP] }, {args_str[ARG_HELP], no_argument, 0, args_chr[ARG_HELP] },
{args_str[ARG_VERSION], no_argument, 0, args_chr[ARG_VERSION] }, {args_str[ARG_VERSION], no_argument, 0, args_chr[ARG_VERSION] },
{0, 0, 0, 0} {0, 0, 0, 0}
@@ -199,7 +215,7 @@ bool parse_args(int argc, char* argv[]) {
char* short_options = build_short_options(); char* short_options = build_short_options();
opt = getopt_long(argc, argv, short_options, long_options, &option_index); opt = getopt_long(argc, argv, short_options, long_options, &option_index);
while (!args.help_flag && !args.version_flag && opt != -1) { while (!args.help_flag && !args.version_flag && !args.list_gpus && opt != -1) {
if(opt == args_chr[ARG_COLOR]) { if(opt == args_chr[ARG_COLOR]) {
args.colors = (struct color **) emalloc(sizeof(struct color *) * NUM_COLORS); args.colors = (struct color **) emalloc(sizeof(struct color *) * NUM_COLORS);
if(!parse_color(optarg, &args.colors)) { if(!parse_color(optarg, &args.colors)) {
@@ -215,6 +231,9 @@ bool parse_args(int argc, char* argv[]) {
return false; return false;
} }
} }
else if(opt == args_chr[ARG_LIST]) {
args.list_gpus = true;
}
else if(opt == args_chr[ARG_HELP]) { else if(opt == args_chr[ARG_HELP]) {
args.help_flag = true; args.help_flag = true;
} }

View File

@@ -21,6 +21,7 @@ enum {
enum { enum {
ARG_COLOR, ARG_COLOR,
ARG_GPU, ARG_GPU,
ARG_LIST,
ARG_HELP, ARG_HELP,
ARG_VERSION ARG_VERSION
}; };
@@ -33,6 +34,7 @@ extern const char *args_str[];
int max_arg_str_length(); int max_arg_str_length();
bool parse_args(int argc, char* argv[]); bool parse_args(int argc, char* argv[]);
bool show_help(); bool show_help();
bool list_gpus();
bool show_version(); bool show_version();
void free_colors_struct(struct color** cs); void free_colors_struct(struct color** cs);
int get_gpu_idx(); int get_gpu_idx();

View File

@@ -1,32 +1,32 @@
#ifndef __ASCII__ #ifndef __ASCII__
#define __ASCII__ #define __ASCII__
#define COLOR_NONE "" #define C_NONE ""
#define COLOR_FG_BLACK "\x1b[30;1m" #define C_FG_BLACK "\x1b[30;1m"
#define COLOR_FG_RED "\x1b[31;1m" #define C_FG_RED "\x1b[31;1m"
#define COLOR_FG_GREEN "\x1b[32;1m" #define C_FG_GREEN "\x1b[32;1m"
#define COLOR_FG_YELLOW "\x1b[33;1m" #define C_FG_YELLOW "\x1b[33;1m"
#define COLOR_FG_BLUE "\x1b[34;1m" #define C_FG_BLUE "\x1b[34;1m"
#define COLOR_FG_MAGENTA "\x1b[35;1m" #define C_FG_MAGENTA "\x1b[35;1m"
#define COLOR_FG_CYAN "\x1b[36;1m" #define C_FG_CYAN "\x1b[36;1m"
#define COLOR_FG_WHITE "\x1b[37;1m" #define C_FG_WHITE "\x1b[37;1m"
#define COLOR_BG_BLACK "\x1b[40;1m" #define C_BG_BLACK "\x1b[40;1m"
#define COLOR_BG_RED "\x1b[41;1m" #define C_BG_RED "\x1b[41;1m"
#define COLOR_BG_GREEN "\x1b[42;1m" #define C_BG_GREEN "\x1b[42;1m"
#define COLOR_BG_YELLOW "\x1b[43;1m" #define C_BG_YELLOW "\x1b[43;1m"
#define COLOR_BG_BLUE "\x1b[44;1m" #define C_BG_BLUE "\x1b[44;1m"
#define COLOR_BG_MAGENTA "\x1b[45;1m" #define C_BG_MAGENTA "\x1b[45;1m"
#define COLOR_BG_CYAN "\x1b[46;1m" #define C_BG_CYAN "\x1b[46;1m"
#define COLOR_BG_WHITE "\x1b[47;1m" #define C_BG_WHITE "\x1b[47;1m"
#define COLOR_FG_B_BLACK "\x1b[90;1m" #define C_FG_B_BLACK "\x1b[90;1m"
#define COLOR_FG_B_RED "\x1b[91;1m" #define C_FG_B_RED "\x1b[91;1m"
#define COLOR_FG_B_GREEN "\x1b[92;1m" #define C_FG_B_GREEN "\x1b[92;1m"
#define COLOR_FG_B_YELLOW "\x1b[93;1m" #define C_FG_B_YELLOW "\x1b[93;1m"
#define COLOR_FG_B_BLUE "\x1b[94;1m" #define C_FG_B_BLUE "\x1b[94;1m"
#define COLOR_FG_B_MAGENTA "\x1b[95;1m" #define C_FG_B_MAGENTA "\x1b[95;1m"
#define COLOR_FG_B_CYAN "\x1b[96;1m" #define C_FG_B_CYAN "\x1b[96;1m"
#define COLOR_FG_B_WHITE "\x1b[97;1m" #define C_FG_B_WHITE "\x1b[97;1m"
#define COLOR_RESET "\x1b[m" #define C_RESET "\x1b[m"
struct ascii_logo { struct ascii_logo {
const char* art; const char* art;
@@ -59,6 +59,23 @@ $C2## ## ## ## ## ## ## ## #: :# \
$C2## ## ## ## ## ## ## ## ####### \ $C2## ## ## ## ## ## ## ## ####### \
$C2## ## ### ## ###### ## ## ## " $C2## ## ### ## ###### ## ## ## "
#define ASCII_INTEL \
"$C1 .#################. \
$C1 .#### ####. \
$C1 .## ### \
$C1 ## :## ### \
$C1 # ## :## ## \
$C1 ## ## ######. #### ###### :## ## \
$C1 ## ## ##: ##: ## ## ### :## ### \
$C1## ## ##: ##: ## :######## :## ## \
$C1## ## ##: ##: ## ##. . :## #### \
$C1## # ##: ##: #### #####: ## \
$C1 ## \
$C1 ###. ..o####. \
$C1 ######oo... ..oo####### \
$C1 o###############o "
// LONG LOGOS
#define ASCII_NVIDIA_L \ #define ASCII_NVIDIA_L \
"$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \ "$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \
$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \ $C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \
@@ -76,14 +93,37 @@ $C1 olcc::; ,:ccloMMMMMMMMM \
$C1 :......oMMMMMMMMMMMMMMMMMMMMMM \ $C1 :......oMMMMMMMMMMMMMMMMMMMMMM \
$C1 :lllMMMMMMMMMMMMMMMMMMMMMMMMMM " $C1 :lllMMMMMMMMMMMMMMMMMMMMMMMMMM "
#define ASCII_INTEL_L \
"$C1 ###############@ \
$C1 ######@ ######@ \
$C1 ###@ ###@ \
$C1 ##@ ###@ \
$C1 ##@ ##@ \
$C1 ##@ ##@ \
$C1 @ ##@ ##@ ##@ \
$C1 #@ ##@ ########@ #####@ #####@ ##@ ##@ \
$C1 #@ ##@ ##@ ##@ ##@ ###@ ###@ ##@ ##@ \
$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ \
$C1 #@ ##@ ##@ ##@ ##@ #########@ ##@ ###@ \
$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ####@ \
$C1 #@ #@ ##@ ##@ ####@ ########@ #@ ##@ \
$C1 ##@ \
$C1 ##@ \
$C1 ###@ ###@ \
$C1 ####@ #########@ \
$C1 #########@ ###############@ \
$C1 ##############################@ "
typedef struct ascii_logo asciiL; typedef struct ascii_logo asciiL;
// ------------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------
// | LOGO | W | H | REPLACE | COLORS LOGO (>0 && <10) | COLORS TEXT (=2) | // | LOGO | W | H | REPLACE | COLORS LOGO | COLORS TEXT |
// ------------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------
asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} }; asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
// Long variants | ---------------------------------------------------------------------------------------------------| asciiL logo_intel = { ASCII_INTEL, 48, 14, false, {C_FG_CYAN}, {C_FG_CYAN, C_FG_WHITE} };
asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} }; // Long variants | ---------------------------------------------------------------------------------------|
asciiL logo_unknown = { NULL, 0, 0, false, {COLOR_NONE}, {COLOR_NONE, COLOR_NONE} }; asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
asciiL logo_intel_l = { ASCII_INTEL_L, 62, 19, true, {C_BG_CYAN, C_BG_WHITE}, {C_FG_CYAN, C_FG_WHITE} };
asciiL logo_unknown = { NULL, 0, 0, false, {C_NONE}, {C_NONE, C_NONE} };
#endif #endif

View File

@@ -2,7 +2,6 @@
#define __GLOBAL__ #define __GLOBAL__
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h>
#include <cstddef> #include <cstddef>
#define STRING_UNKNOWN "Unknown" #define STRING_UNKNOWN "Unknown"

View File

@@ -32,8 +32,6 @@ VENDOR get_gpu_vendor(struct gpu_info* gpu) {
return gpu->vendor; return gpu->vendor;
} }
double trunc(double val) { return ((int)(100 * val)) / 100.0; }
int32_t get_value_as_smallest_unit(char ** str, uint64_t value) { int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
int32_t ret; int32_t ret;
int max_len = 10; // Max is 8 for digits, 2 for units int max_len = 10; // Max is 8 for digits, 2 for units
@@ -116,17 +114,17 @@ char* get_str_l2(struct gpu_info* gpu) {
return string; return string;
} }
char* get_str_peak_performance(struct gpu_info* gpu) { char* get_str_peak_performance_generic(int64_t pp) {
char* str; char* str;
if(gpu->peak_performance == -1) { if(pp == -1) {
str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1)); str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1); strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
return str; return str;
} }
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
double flopsd = (double) gpu->peak_performance; double flopsd = (double) pp;
uint32_t max_size = 7+1+7+1; uint32_t max_size = 7+1+7+1;
str = (char *) ecalloc(max_size, sizeof(char)); str = (char *) ecalloc(max_size, sizeof(char));
@@ -139,3 +137,19 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
return str; return str;
} }
char* get_str_peak_performance(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance);
}
char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance_tcu);
}
char* get_str_generic(int32_t data) {
// Largest int is 10, +1 for possible negative, +1 for EOL
uint32_t max_size = 12;
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", data);
return dummy;
}

View File

@@ -4,13 +4,13 @@
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#include "../cuda/nvmlb.hpp"
#include "../cuda/pci.hpp" #include "../cuda/pci.hpp"
#define UNKNOWN_FREQ -1 #define UNKNOWN_FREQ -1
enum { enum {
GPU_VENDOR_NVIDIA GPU_VENDOR_NVIDIA,
GPU_VENDOR_INTEL
}; };
enum { enum {
@@ -41,6 +41,13 @@ struct topology {
int32_t streaming_mp; int32_t streaming_mp;
int32_t cores_per_mp; int32_t cores_per_mp;
int32_t cuda_cores; int32_t cuda_cores;
int32_t tensor_cores;
};
struct topology_i {
int32_t slices;
int32_t subslices;
int32_t eu_subslice;
}; };
struct memory { struct memory {
@@ -57,11 +64,12 @@ struct gpu_info {
char* name; char* name;
int64_t freq; int64_t freq;
struct pci* pci; struct pci* pci;
struct nvml_data* nvmld;
struct topology* topo; struct topology* topo;
struct topology_i* topo_i;
struct memory* mem; struct memory* mem;
struct cache* cach; struct cache* cach;
int64_t peak_performance; int64_t peak_performance;
int64_t peak_performance_tcu;
int32_t idx; int32_t idx;
}; };
@@ -74,5 +82,7 @@ char* get_str_bus_width(struct gpu_info* gpu);
char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu);
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
char* get_str_generic(int32_t data);
#endif #endif

View File

@@ -4,10 +4,11 @@
#include "args.hpp" #include "args.hpp"
#include "global.hpp" #include "global.hpp"
#include "master.hpp"
#include "../cuda/cuda.hpp" #include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp" #include "../cuda/uarch.hpp"
static const char* VERSION = "0.10"; static const char* VERSION = "0.11";
void print_help(char *argv[]) { void print_help(char *argv[]) {
const char **t = args_str; const char **t = args_str;
@@ -18,10 +19,11 @@ void print_help(char *argv[]) {
printf("Simple yet fancy GPU architecture fetching tool\n\n"); printf("Simple yet fancy GPU architecture fetching tool\n\n");
printf("Options: \n"); printf("Options: \n");
printf(" -%c, --%s %*s Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), ""); printf(" -%c, --%s %*s Set the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), "");
printf(" -%c, --%s %*s Selects the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), ""); printf(" -%c, --%s %*s List the available GPUs in the system\n", c[ARG_LIST], t[ARG_LIST], (int) (max_len-strlen(t[ARG_LIST])), "");
printf(" -%c, --%s %*s Prints this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), ""); printf(" -%c, --%s %*s Select the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), "");
printf(" -%c, --%s %*s Prints gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), ""); printf(" -%c, --%s %*s Print this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
printf(" -%c, --%s %*s Print gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
printf("\nCOLORS: \n"); printf("\nCOLORS: \n");
printf(" Color scheme can be set using a predefined color scheme or a custom one:\n"); printf(" Color scheme can be set using a predefined color scheme or a custom one:\n");
@@ -64,14 +66,19 @@ int main(int argc, char* argv[]) {
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
struct gpu_list* list = get_gpu_list();
if(list_gpus()) {
return print_gpus_list(list);
}
set_log_level(true); set_log_level(true);
printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\ printf("[WARNING]: gpufetch is in beta. The provided information may be incomplete or wrong.\n\
If you want to help to improve gpufetch, please compare the output of the program\n\ If you want to help to improve gpufetch, please compare the output of the program\n\
with a reliable source which you know is right (e.g, techpowerup.com) and report\n\ with a reliable source which you know is right (e.g, techpowerup.com) and report\n\
any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues"); any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues\n");
struct gpu_info* gpu = get_gpu_info(get_gpu_idx()); struct gpu_info* gpu = get_gpu_info(list, get_gpu_idx());
if(gpu == NULL) if(gpu == NULL)
return EXIT_FAILURE; return EXIT_FAILURE;

62
src/common/master.cpp Normal file
View File

@@ -0,0 +1,62 @@
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include "master.hpp"
#include "../cuda/cuda.hpp"
#include "../intel/intel.hpp"
#define MAX_GPUS 1000
struct gpu_list {
struct gpu_info ** gpus;
int num_gpus;
};
struct gpu_list* get_gpu_list() {
int idx = 0;
struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
list->num_gpus = 0;
list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
#ifdef BACKEND_CUDA
bool valid = true;
while(valid) {
list->gpus[idx] = get_gpu_info_cuda(idx);
if(list->gpus[idx] != NULL) idx++;
else valid = false;
}
list->num_gpus += idx;
#endif
#ifdef BACKEND_INTEL
list->gpus[idx] = get_gpu_info_intel();
if(list->gpus[idx] != NULL) list->num_gpus++;
#endif
return list;
}
bool print_gpus_list(struct gpu_list* list) {
for(int i=0; i < list->num_gpus; i++) {
printf("GPU %d: ", i);
if(list->gpus[i]->vendor == GPU_VENDOR_NVIDIA) {
#ifdef BACKEND_CUDA
print_gpu_cuda(list->gpus[i]);
#endif
}
else if(list->gpus[i]->vendor == GPU_VENDOR_INTEL) {
#ifdef BACKEND_INTEL
print_gpu_intel(list->gpus[i]);
#endif
}
}
return true;
}
struct gpu_info* get_gpu_info(struct gpu_list* list, int idx) {
return list->gpus[idx];
}

12
src/common/master.hpp Normal file
View File

@@ -0,0 +1,12 @@
#ifndef __GPU_LIST__
#define __GPU_LIST__
#include "gpu.hpp"
struct gpu_list;
struct gpu_list* get_gpu_list();
bool print_gpus_list(struct gpu_list* list);
struct gpu_info* get_gpu_info(struct gpu_list* list, int idx);
#endif

73
src/common/pci.cpp Normal file
View File

@@ -0,0 +1,73 @@
#include "global.hpp"
#include "pci.hpp"
#include <cstddef>
#define CLASS_VGA_CONTROLLER 0x0300
bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
return true;
}
}
printWarn("Unable to find a valid device for id %d using pciutils", id);
return false;
}
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) {
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
return dev->device_id;
}
}
printErr("Unable to find a valid device for id %d using pciutils", id);
return 0;
}
void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) {
bool found = false;
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
pci->domain = dev->domain;
pci->bus = dev->bus;
pci->dev = dev->dev;
pci->func = dev->func;
found = true;
}
}
if(!found) printErr("Unable to find a valid device for id %d using pciutils", id);
}
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) {
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
// TODO: Refactor this; instead of 2xGet + 1xSet, do it better
if(pciutils_is_vendor_id_present(devices, id)) {
pci->vendor_id = id;
pci->device_id = pciutils_get_pci_device_id(devices, id);
pciutils_set_pci_bus(pci, devices, id);
return pci;
}
else {
return NULL;
}
}
struct pci_dev *get_pci_devices_from_pciutils() {
struct pci_access *pacc;
struct pci_dev *dev;
pacc = pci_alloc();
pci_init(pacc);
pci_scan_bus(pacc);
for (dev=pacc->devices; dev; dev=dev->next) {
pci_fill_info(dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS);
}
return pacc->devices;
}

21
src/common/pci.hpp Normal file
View File

@@ -0,0 +1,21 @@
#ifndef __GPUFETCH_PCI__
#define __GPUFETCH_PCI__
#include <cstdint>
extern "C" {
#include <pci/pci.h>
}
struct pci {
uint16_t vendor_id;
uint16_t device_id;
uint16_t domain;
uint16_t bus;
uint16_t dev;
uint16_t func;
};
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id);
struct pci_dev *get_pci_devices_from_pciutils();
#endif

View File

@@ -9,6 +9,8 @@
#include "../common/global.hpp" #include "../common/global.hpp"
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
#include "../intel/uarch.hpp"
#include "../intel/intel.hpp"
#include "../cuda/cuda.hpp" #include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp" #include "../cuda/uarch.hpp"
@@ -34,15 +36,19 @@ enum {
ATTRIBUTE_CHIP, ATTRIBUTE_CHIP,
ATTRIBUTE_UARCH, ATTRIBUTE_UARCH,
ATTRIBUTE_TECHNOLOGY, ATTRIBUTE_TECHNOLOGY,
ATTRIBUTE_GT,
ATTRIBUTE_FREQUENCY, ATTRIBUTE_FREQUENCY,
ATTRIBUTE_STREAMINGMP, ATTRIBUTE_STREAMINGMP,
ATTRIBUTE_CORESPERMP, ATTRIBUTE_CORESPERMP,
ATTRIBUTE_CUDA_CORES, ATTRIBUTE_CUDA_CORES,
ATTRIBUTE_TENSOR_CORES,
ATTRIBUTE_EUS,
ATTRIBUTE_L2, ATTRIBUTE_L2,
ATTRIBUTE_MEMORY, ATTRIBUTE_MEMORY,
ATTRIBUTE_MEMORY_FREQ, ATTRIBUTE_MEMORY_FREQ,
ATTRIBUTE_BUS_WIDTH, ATTRIBUTE_BUS_WIDTH,
ATTRIBUTE_PEAK ATTRIBUTE_PEAK,
ATTRIBUTE_PEAK_TENSOR,
}; };
static const char* ATTRIBUTE_FIELDS [] = { static const char* ATTRIBUTE_FIELDS [] = {
@@ -50,15 +56,19 @@ static const char* ATTRIBUTE_FIELDS [] = {
"GPU processor:", "GPU processor:",
"Microarchitecture:", "Microarchitecture:",
"Technology:", "Technology:",
"Graphics Tier:",
"Max Frequency:", "Max Frequency:",
"SMs:", "SMs:",
"Cores/SM:", "Cores/SM:",
"CUDA cores:", "CUDA Cores:",
"Tensor Cores:",
"Execution Units:",
"L2 Size:", "L2 Size:",
"Memory:", "Memory:",
"Memory frequency:", "Memory frequency:",
"Bus width:", "Bus width:",
"Peak Performance:", "Peak Performance:",
"Peak Performance (MMA):",
}; };
static const char* ATTRIBUTE_FIELDS_SHORT [] = { static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -66,15 +76,19 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Processor:", "Processor:",
"uArch:", "uArch:",
"Technology:", "Technology:",
"GT:",
"Max Freq.:", "Max Freq.:",
"SMs:", "SMs:",
"Cores/SM:", "Cores/SM:",
"CUDA cores:", "CUDA Cores:",
"Tensor Cores:",
"EUs:",
"L2 Size:", "L2 Size:",
"Memory:", "Memory:",
"Memory freq.:", "Memory freq.:",
"Bus width:", "Bus width:",
"Peak Perf.:", "Peak Perf.:",
"Peak Perf.(MMA):",
}; };
struct terminal { struct terminal {
@@ -194,23 +208,32 @@ void replace_bgbyfg_color(struct ascii_logo* logo) {
for(int i=0; i < 2; i++) { for(int i=0; i < 2; i++) {
if(logo->color_ascii[i] == NULL) break; if(logo->color_ascii[i] == NULL) break;
if(strcmp(logo->color_ascii[i], COLOR_BG_BLACK) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLACK); if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
else if(strcmp(logo->color_ascii[i], COLOR_BG_RED) == 0) strcpy(logo->color_ascii[i], COLOR_FG_RED); else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
else if(strcmp(logo->color_ascii[i], COLOR_BG_GREEN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_GREEN); else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
else if(strcmp(logo->color_ascii[i], COLOR_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], COLOR_FG_YELLOW); else if(strcmp(logo->color_ascii[i], C_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], C_FG_YELLOW);
else if(strcmp(logo->color_ascii[i], COLOR_BG_BLUE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLUE); else if(strcmp(logo->color_ascii[i], C_BG_BLUE) == 0) strcpy(logo->color_ascii[i], C_FG_BLUE);
else if(strcmp(logo->color_ascii[i], COLOR_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], COLOR_FG_MAGENTA); else if(strcmp(logo->color_ascii[i], C_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], C_FG_MAGENTA);
else if(strcmp(logo->color_ascii[i], COLOR_BG_CYAN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_CYAN); else if(strcmp(logo->color_ascii[i], C_BG_CYAN) == 0) strcpy(logo->color_ascii[i], C_FG_CYAN);
else if(strcmp(logo->color_ascii[i], COLOR_BG_WHITE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_WHITE); else if(strcmp(logo->color_ascii[i], C_BG_WHITE) == 0) strcpy(logo->color_ascii[i], C_FG_WHITE);
}
}
struct ascii_logo* choose_ascii_art_aux(struct ascii_logo* logo_long, struct ascii_logo* logo_short, struct terminal* term, int lf) {
if(ascii_fits_screen(term->w, *logo_long, lf)) {
return logo_long;
}
else {
return logo_short;
} }
} }
void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) { void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) {
if(art->vendor == GPU_VENDOR_NVIDIA) { if(art->vendor == GPU_VENDOR_NVIDIA) {
if(term != NULL && ascii_fits_screen(term->w, logo_nvidia_l, lf)) art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf);
art->art = &logo_nvidia_l; }
else else if(art->vendor == GPU_VENDOR_INTEL) {
art->art = &logo_nvidia; art->art = choose_ascii_art_aux(&logo_intel_l, &logo_intel, term, lf);
} }
else { else {
art->art = &logo_unknown; art->art = &logo_unknown;
@@ -222,10 +245,10 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
switch(art->style) { switch(art->style) {
case STYLE_LEGACY: case STYLE_LEGACY:
logo->replace_blocks = false; logo->replace_blocks = false;
strcpy(logo->color_text[0], COLOR_NONE); strcpy(logo->color_text[0], C_NONE);
strcpy(logo->color_text[1], COLOR_NONE); strcpy(logo->color_text[1], C_NONE);
strcpy(logo->color_ascii[0], COLOR_NONE); strcpy(logo->color_ascii[0], C_NONE);
strcpy(logo->color_ascii[1], COLOR_NONE); strcpy(logo->color_ascii[1], C_NONE);
art->reset[0] = '\0'; art->reset[0] = '\0';
break; break;
case STYLE_RETRO: case STYLE_RETRO:
@@ -239,7 +262,7 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true)); strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true));
strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true)); strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true));
} }
strcpy(art->reset, COLOR_RESET); strcpy(art->reset, C_RESET);
break; break;
case STYLE_INVALID: case STYLE_INVALID:
default: default:
@@ -336,6 +359,48 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
printf("\n"); printf("\n");
} }
#ifdef BACKEND_INTEL
bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
if(art == NULL)
return false;
char* gpu_name = get_str_gpu_name(gpu);
char* uarch = get_str_uarch_intel(gpu->arch);
char* gt = get_str_gt(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch);
char* eus = get_str_eu(gpu);
char* max_frequency = get_str_freq(gpu);
char* pp = get_str_peak_performance(gpu);
setAttribute(art, ATTRIBUTE_NAME, gpu_name);
setAttribute(art, ATTRIBUTE_UARCH, uarch);
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_GT, gt);
setAttribute(art, ATTRIBUTE_EUS, eus);
setAttribute(art, ATTRIBUTE_PEAK, pp);
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT;
longest_attribute = longest_attribute_length(art, attribute_fields);
}
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
return true;
}
#endif
#ifdef BACKEND_CUDA
bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) { bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
struct ascii* art = set_ascii(get_gpu_vendor(gpu), s); struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
@@ -344,12 +409,13 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
char* gpu_name = get_str_gpu_name(gpu); char* gpu_name = get_str_gpu_name(gpu);
char* gpu_chip = get_str_chip(gpu->arch); char* gpu_chip = get_str_chip(gpu->arch);
char* uarch = get_str_uarch(gpu->arch); char* uarch = get_str_uarch_cuda(gpu->arch);
char* comp_cap = get_str_cc(gpu->arch); char* comp_cap = get_str_cc(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch); char* manufacturing_process = get_str_process(gpu->arch);
char* sms = get_str_sm(gpu); char* sms = get_str_sm(gpu);
char* corespersm = get_str_cores_sm(gpu); char* corespersm = get_str_cores_sm(gpu);
char* cores = get_str_cuda_cores(gpu); char* cores = get_str_cuda_cores(gpu);
char* tensorc = get_str_tensor_cores(gpu);
char* max_frequency = get_str_freq(gpu); char* max_frequency = get_str_freq(gpu);
char* l2 = get_str_l2(gpu); char* l2 = get_str_l2(gpu);
char* mem_size = get_str_memory_size(gpu); char* mem_size = get_str_memory_size(gpu);
@@ -357,6 +423,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
char* mem_freq = get_str_memory_clock(gpu); char* mem_freq = get_str_memory_clock(gpu);
char* bus_width = get_str_bus_width(gpu); char* bus_width = get_str_bus_width(gpu);
char* pp = get_str_peak_performance(gpu); char* pp = get_str_peak_performance(gpu);
char* pp_tensor = get_str_peak_performance_tensor(gpu);
char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2)); char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
sprintf(mem, "%s %s", mem_size, mem_type); sprintf(mem, "%s %s", mem_size, mem_type);
@@ -372,11 +439,17 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_STREAMINGMP, sms); setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm); setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm);
setAttribute(art, ATTRIBUTE_CUDA_CORES, cores); setAttribute(art, ATTRIBUTE_CUDA_CORES, cores);
if(gpu->topo->tensor_cores > 0) {
setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc);
}
setAttribute(art, ATTRIBUTE_MEMORY, mem); setAttribute(art, ATTRIBUTE_MEMORY, mem);
setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq); setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq);
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
setAttribute(art, ATTRIBUTE_L2, l2); setAttribute(art, ATTRIBUTE_L2, l2);
setAttribute(art, ATTRIBUTE_PEAK, pp); setAttribute(art, ATTRIBUTE_PEAK, pp);
if(gpu->topo->tensor_cores > 0) {
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
}
const char** attribute_fields = ATTRIBUTE_FIELDS; const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields); uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
@@ -402,6 +475,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
return true; return true;
} }
#endif
struct terminal* get_terminal_size() { struct terminal* get_terminal_size() {
struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal)); struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal));
@@ -434,5 +508,17 @@ struct terminal* get_terminal_size() {
bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) { bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) {
struct terminal* term = get_terminal_size(); struct terminal* term = get_terminal_size();
if(gpu->vendor == GPU_VENDOR_NVIDIA)
#ifdef BACKEND_CUDA
return print_gpufetch_cuda(gpu, s, cs, term); return print_gpufetch_cuda(gpu, s, cs, term);
#else
return false;
#endif
else {
#ifdef BACKEND_INTEL
return print_gpufetch_intel(gpu, s, cs, term);
#else
return false;
#endif
}
} }

28
src/common/uarch.cpp Normal file
View File

@@ -0,0 +1,28 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "global.hpp"
#include "uarch.hpp"
char* get_str_process(struct uarch* arch) {
char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
int32_t process = arch->process;
if(process == UNK) {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
}
else if(process > 100) {
sprintf(str, "%.2fum", (double)process/100);
}
else if(process > 0){
sprintf(str, "%dnm", process);
}
else {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
printBug("Found invalid process: '%d'", process);
}
return str;
}

31
src/common/uarch.hpp Normal file
View File

@@ -0,0 +1,31 @@
#ifndef __COMMON_UARCH__
#define __COMMON_UARCH__
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
typedef uint32_t GPUCHIP;
typedef uint32_t MICROARCH;
struct uarch {
// NVIDIA specific
int32_t cc_major;
int32_t cc_minor;
int32_t compute_capability;
// Intel specific
int32_t gt;
int32_t eu;
MICROARCH uarch;
GPUCHIP chip;
int32_t process;
char* uarch_str;
char* chip_str;
};
#endif

View File

@@ -1,10 +1,10 @@
#ifndef __GPUCHIPS__ #ifndef __CUDA_GPUCHIPS__
#define __GPUCHIPS__ #define __CUDA_GPUCHIPS__
typedef uint32_t GPUCHIP; typedef uint32_t GPUCHIP;
enum { enum {
CHIP_UNKNOWN, CHIP_UNKNOWN_CUDA,
CHIP_G80, CHIP_G80,
CHIP_G80GL, CHIP_G80GL,
CHIP_G84, CHIP_G84,

View File

@@ -2,10 +2,18 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include "cuda.hpp" #include "cuda.hpp"
#include "nvmlb.hpp"
#include "uarch.hpp" #include "uarch.hpp"
#include "../common/pci.hpp"
#include "../common/global.hpp" #include "../common/global.hpp"
bool print_gpu_cuda(struct gpu_info* gpu) {
char* cc = get_str_cc(gpu->arch);
printf("%s (Compute Capability %s)\n", gpu->name, cc);
free(cc);
return true;
}
struct cache* get_cache_info(cudaDeviceProp prop) { struct cache* get_cache_info(cudaDeviceProp prop) {
struct cache* cach = (struct cache*) emalloc(sizeof(struct cache)); struct cache* cach = (struct cache*) emalloc(sizeof(struct cache));
@@ -17,12 +25,19 @@ struct cache* get_cache_info(cudaDeviceProp prop) {
return cach; return cach;
} }
int get_tensor_cores(int sm, int major) {
if(major == 7) return sm * 8;
else if(major == 8) return sm * 4;
else return 0;
}
struct topology* get_topology_info(cudaDeviceProp prop) { struct topology* get_topology_info(cudaDeviceProp prop) {
struct topology* topo = (struct topology*) emalloc(sizeof(struct topology)); struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
topo->streaming_mp = prop.multiProcessorCount; topo->streaming_mp = prop.multiProcessorCount;
topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor); topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp; topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
topo->tensor_cores = get_tensor_cores(topo->streaming_mp, prop.major);
return topo; return topo;
} }
@@ -60,11 +75,17 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
return mem; return mem;
} }
int64_t get_peak_performance(struct gpu_info* gpu) { // Compute peak performance when using CUDA cores
int64_t get_peak_performance_cuda(struct gpu_info* gpu) {
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2; return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
} }
struct gpu_info* get_gpu_info(int gpu_idx) { // Compute peak performance when using tensor cores
int64_t get_peak_performance_tcu(struct gpu_info* gpu) {
return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
}
struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL; gpu->pci = NULL;
gpu->idx = gpu_idx; gpu->idx = gpu_idx;
@@ -74,8 +95,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
return NULL; return NULL;
} }
if(gpu_idx == 0) {
printf("Waiting for CUDA driver to start..."); printf("Waiting for CUDA driver to start...");
fflush(stdout); fflush(stdout);
}
int num_gpus = -1; int num_gpus = -1;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
@@ -83,7 +106,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return NULL; return NULL;
} }
printf("\r ");
if(gpu_idx == 0) {
printf("\r");
}
if(num_gpus <= 0) { if(num_gpus <= 0) {
printErr("No CUDA capable devices found!"); printErr("No CUDA capable devices found!");
@@ -91,7 +117,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
} }
if(gpu->idx+1 > num_gpus) { if(gpu->idx+1 > num_gpus) {
printErr("Requested GPU index %d in a system with %d GPUs", gpu->idx, num_gpus); // Master is trying to query an invalid GPU
return NULL; return NULL;
} }
@@ -106,38 +132,31 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1)); gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
strcpy(gpu->name, deviceProp.name); strcpy(gpu->name, deviceProp.name);
gpu->nvmld = nvml_init(); struct pci_dev *devices = get_pci_devices_from_pciutils();
if(nvml_get_pci_info(gpu->idx, gpu->nvmld)) { gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA);
gpu->pci = get_pci_from_nvml(gpu->nvmld);
}
gpu->arch = get_uarch_from_cuda(gpu); gpu->arch = get_uarch_from_cuda(gpu);
gpu->cach = get_cache_info(deviceProp); gpu->cach = get_cache_info(deviceProp);
gpu->mem = get_memory_info(gpu, deviceProp); gpu->mem = get_memory_info(gpu, deviceProp);
gpu->topo = get_topology_info(deviceProp); gpu->topo = get_topology_info(deviceProp);
gpu->peak_performance = get_peak_performance(gpu); gpu->peak_performance = get_peak_performance_cuda(gpu);
gpu->peak_performance_tcu = get_peak_performance_tcu(gpu);
return gpu; return gpu;
} }
char* get_str_sm(struct gpu_info* gpu) { char* get_str_sm(struct gpu_info* gpu) {
uint32_t max_size = 10; return get_str_generic(gpu->topo->streaming_mp);
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", gpu->topo->streaming_mp);
return dummy;
} }
char* get_str_cores_sm(struct gpu_info* gpu) { char* get_str_cores_sm(struct gpu_info* gpu) {
uint32_t max_size = 10; return get_str_generic(gpu->topo->cores_per_mp);
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", gpu->topo->cores_per_mp);
return dummy;
} }
char* get_str_cuda_cores(struct gpu_info* gpu) { char* get_str_cuda_cores(struct gpu_info* gpu) {
uint32_t max_size = 10; return get_str_generic(gpu->topo->cuda_cores);
char* dummy = (char *) ecalloc(max_size, sizeof(char)); }
snprintf(dummy, max_size, "%d", gpu->topo->cuda_cores);
return dummy; char* get_str_tensor_cores(struct gpu_info* gpu) {
return get_str_generic(gpu->topo->tensor_cores);
} }

View File

@@ -1,11 +1,13 @@
#ifndef __CUDA__ #ifndef __CUDA_GPU__
#define __CUDA__ #define __CUDA_GPU__
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
struct gpu_info* get_gpu_info(int gpu_idx); struct gpu_info* get_gpu_info_cuda(int gpu_idx);
bool print_gpu_cuda(struct gpu_info* gpu);
char* get_str_sm(struct gpu_info* gpu); char* get_str_sm(struct gpu_info* gpu);
char* get_str_cores_sm(struct gpu_info* gpu); char* get_str_cores_sm(struct gpu_info* gpu);
char* get_str_cuda_cores(struct gpu_info* gpu); char* get_str_cuda_cores(struct gpu_info* gpu);
char* get_str_tensor_cores(struct gpu_info* gpu);
#endif #endif

View File

@@ -1,70 +0,0 @@
#include <nvml.h>
#include "nvmlb.hpp"
#include "../common/global.hpp"
struct nvml_data {
bool nvml_started;
nvmlPciInfo_t pci;
};
struct nvml_data* nvml_init() {
struct nvml_data* data = (struct nvml_data*) emalloc(sizeof(struct nvml_data));
data->nvml_started = false;
nvmlReturn_t result;
if ((result = nvmlInit()) != NVML_SUCCESS) {
printErr("nvmlInit: %s\n", nvmlErrorString(result));
return NULL;
}
data->nvml_started = true;
return data;
}
bool nvml_get_pci_info(int gpu_idx, struct nvml_data* data) {
nvmlReturn_t result;
nvmlDevice_t device;
if(!data->nvml_started) {
printErr("nvml_get_pci_info: nvml was not started");
return false;
}
if ((result = nvmlDeviceGetHandleByIndex(gpu_idx, &device)) != NVML_SUCCESS) {
printErr("nvmlDeviceGetHandleByIndex: %s\n", nvmlErrorString(result));
return false;
}
if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != NVML_SUCCESS) {
printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
return false;
}
return true;
}
uint16_t nvml_get_pci_vendor_id(struct nvml_data* data) {
return data->pci.pciDeviceId & 0x0000FFFF;
}
uint16_t nvml_get_pci_device_id(struct nvml_data* data) {
return (data->pci.pciDeviceId & 0xFFFF0000) >> 16;
}
bool nvml_shutdown(struct nvml_data* data) {
nvmlReturn_t result;
if(!data->nvml_started) {
printWarn("nvml_get_pci_info: nvml was not started");
return true;
}
if ((result = nvmlShutdown()) != NVML_SUCCESS) {
printErr("nvmlShutdown: %s\n", nvmlErrorString(result));
return false;
}
return true;
}

View File

@@ -1,16 +0,0 @@
// NVML Backend
#ifndef __NVMLB__
#define __NVMLB__
#include <stdbool.h>
#include <stdint.h>
struct nvml_data;
struct nvml_data* nvml_init();
bool nvml_get_pci_info(int dev, struct nvml_data* data);
uint16_t nvml_get_pci_vendor_id(struct nvml_data* data);
uint16_t nvml_get_pci_device_id(struct nvml_data* data);
bool nvml_shutdown(struct nvml_data* data);
#endif

View File

@@ -1,28 +1,14 @@
#include <stdio.h> #include <stdio.h>
#include "pci.hpp" #include "pci.hpp"
#include "nvmlb.hpp"
#include "chips.hpp" #include "chips.hpp"
#include "../common/global.hpp" #include "../common/global.hpp"
#include "../common/pci.hpp"
#define CHECK_PCI_START if (false) {} #define CHECK_PCI_START if (false) {}
#define CHECK_PCI(pci, id, chip) \ #define CHECK_PCI(pci, id, chip) \
else if (pci->device_id == id) return chip; else if (pci->device_id == id) return chip;
#define CHECK_PCI_END else { printBug("TODOO"); return CHIP_UNKNOWN; } #define CHECK_PCI_END else { printBug("Unkown CUDA device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_CUDA; }
struct pci {
uint16_t vendor_id;
uint16_t device_id;
};
struct pci* get_pci_from_nvml(struct nvml_data* data) {
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
pci->vendor_id = nvml_get_pci_vendor_id(data);
pci->device_id = nvml_get_pci_device_id(data);
return pci;
}
/* /*
* pci ids were retrieved using https://github.com/pciutils/pciids * pci ids were retrieved using https://github.com/pciutils/pciids
@@ -33,7 +19,7 @@ struct pci* get_pci_from_nvml(struct nvml_data* data) {
* or in pci.ids itself) * or in pci.ids itself)
*/ */
GPUCHIP get_chip_from_pci(struct pci* pci) { GPUCHIP get_chip_from_pci_cuda(struct pci* pci) {
CHECK_PCI_START CHECK_PCI_START
CHECK_PCI(pci, 0x25e5, CHIP_GA107BM) CHECK_PCI(pci, 0x25e5, CHIP_GA107BM)
CHECK_PCI(pci, 0x25e2, CHIP_GA107BM) CHECK_PCI(pci, 0x25e2, CHIP_GA107BM)

View File

@@ -1,13 +1,19 @@
#ifndef __PCI__ #ifndef __PCI_CUDA__
#define __PCI__ #define __PCI_CUDA__
#include <stdint.h> #include <stdint.h>
#include "nvmlb.hpp"
#include "../common/pci.hpp"
#include "chips.hpp" #include "chips.hpp"
/*
* doc: https://wiki.osdev.org/PCI#Class_Codes
* https://pci-ids.ucw.cz/read/PC
*/
#define PCI_VENDOR_ID_NVIDIA 0x10de
struct pci; struct pci;
struct pci* get_pci_from_nvml(struct nvml_data* data); GPUCHIP get_chip_from_pci_cuda(struct pci* pci);
GPUCHIP get_chip_from_pci(struct pci* pci);
#endif #endif

View File

@@ -3,21 +3,14 @@
#include <stdint.h> #include <stdint.h>
#include <cstddef> #include <cstddef>
#include "../common/uarch.hpp"
#include "../common/global.hpp" #include "../common/global.hpp"
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
#include "chips.hpp" #include "chips.hpp"
typedef uint32_t MICROARCH;
// Any clock multiplier // Any clock multiplier
#define CM_ANY -1 #define CM_ANY -1
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
// MICROARCH values // MICROARCH values
enum { enum {
UARCH_UNKNOWN, UARCH_UNKNOWN,
@@ -43,23 +36,10 @@ static const char *uarch_str[] = {
/*[ARCH_AMPERE] = */ "Ampere", /*[ARCH_AMPERE] = */ "Ampere",
}; };
struct uarch {
int32_t cc_major;
int32_t cc_minor;
int32_t compute_capability;
MICROARCH uarch;
GPUCHIP chip;
int32_t process;
char* uarch_str;
char* chip_str;
};
#define CHECK_UARCH_START if (false) {} #define CHECK_UARCH_START if (false) {}
#define CHECK_UARCH(arch, chip_, str, uarch, process) \ #define CHECK_UARCH(arch, chip_, str, uarch, process) \
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process); else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); } #define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) { void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1)); arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
@@ -74,7 +54,7 @@ void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t proce
* o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip * o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip
* o CHIP_XXXM: indicates a mobile chip * o CHIP_XXXM: indicates a mobile chip
*/ */
void map_chip_to_uarch(struct uarch* arch) { void map_chip_to_uarch_cuda(struct uarch* arch) {
CHECK_UARCH_START CHECK_UARCH_START
// TESLA (1.0, 1.1, 1.2, 1.3) // // TESLA (1.0, 1.1, 1.2, 1.3) //
CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90) CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90)
@@ -263,9 +243,8 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
arch->cc_major = deviceProp.major; arch->cc_major = deviceProp.major;
arch->cc_minor = deviceProp.minor; arch->cc_minor = deviceProp.minor;
arch->compute_capability = deviceProp.major * 10 + deviceProp.minor; arch->compute_capability = deviceProp.major * 10 + deviceProp.minor;
arch->chip = get_chip_from_pci(gpu->pci); arch->chip = get_chip_from_pci_cuda(gpu->pci);
map_chip_to_uarch_cuda(arch);
map_chip_to_uarch(arch);
return arch; return arch;
} }
@@ -335,10 +314,6 @@ MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
CHECK_MEMTYPE_END CHECK_MEMTYPE_END
} }
const char* get_str_uarch(struct uarch* arch) {
return uarch_str[arch->uarch];
}
char* get_str_cc(struct uarch* arch) { char* get_str_cc(struct uarch* arch) {
uint32_t max_size = 4; uint32_t max_size = 4;
char* cc = (char *) ecalloc(max_size, sizeof(char)); char* cc = (char *) ecalloc(max_size, sizeof(char));
@@ -346,31 +321,14 @@ char* get_str_cc(struct uarch* arch) {
return cc; return cc;
} }
char* get_str_process(struct uarch* arch) {
char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
int32_t process = arch->process;
if(process == UNK) {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
}
else if(process > 100) {
sprintf(str, "%.2fum", (double)process/100);
}
else if(process > 0){
sprintf(str, "%dnm", process);
}
else {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
printBug("Found invalid process: '%d'", process);
}
return str;
}
char* get_str_chip(struct uarch* arch) { char* get_str_chip(struct uarch* arch) {
return arch->chip_str; return arch->chip_str;
} }
const char* get_str_uarch_cuda(struct uarch* arch) {
return uarch_str[arch->uarch];
}
void free_uarch_struct(struct uarch* arch) { void free_uarch_struct(struct uarch* arch) {
free(arch->uarch_str); free(arch->uarch_str);
free(arch->chip_str); free(arch->chip_str);

View File

@@ -1,5 +1,5 @@
#ifndef __UARCH__ #ifndef __CUDA_UARCH__
#define __UARCH__ #define __CUDA_UARCH__
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
@@ -8,7 +8,7 @@ struct uarch;
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu); struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
bool clkm_possible_for_uarch(int clkm, struct uarch* arch); bool clkm_possible_for_uarch(int clkm, struct uarch* arch);
MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch); MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
char* get_str_uarch(struct uarch* arch); char* get_str_uarch_cuda(struct uarch* arch);
char* get_str_cc(struct uarch* arch); char* get_str_cc(struct uarch* arch);
char* get_str_chip(struct uarch* arch); char* get_str_chip(struct uarch* arch);
char* get_str_process(struct uarch* arch); char* get_str_process(struct uarch* arch);

12
src/intel/check.sh Executable file
View File

@@ -0,0 +1,12 @@
#!/bin/bash -u
# Checks the difference between supported uarchs
# and uarchs that have their topology available
# in file uarch.cpp
uarchs="$(grep 'CHECK_UARCH' uarch.cpp | cut -d',' -f4-5 | grep 'UARCH_GEN' | tr -d ' ' | sort | uniq)"
topos="$(grep 'CHECK_TOPO' uarch.cpp | cut -d',' -f3,4 | grep 'UARCH_' | tr -d ' ' | sort | uniq)"
echo "$uarchs" > /tmp/uarchs.txt
echo "$topos" > /tmp/topos.txt
meld /tmp/uarchs.txt /tmp/topos.txt
rm -f /tmp/uarchs.txt /tmp/topos.txt

59
src/intel/chips.hpp Normal file
View File

@@ -0,0 +1,59 @@
#ifndef __INTEL_GPUCHIPS__
#define __INTEL_GPUCHIPS__
#include <stdint.h>
typedef uint32_t GPUCHIP;
enum {
CHIP_UNKNOWN_INTEL,
// Gen6
CHIP_HD_2000,
CHIP_HD_3000,
// Gen7
CHIP_HD_2500,
CHIP_HD_4000,
CHIP_HD_P4000,
// Gen7.5
CHIP_HD_4200,
CHIP_HD_4400,
CHIP_HD_4600,
CHIP_HD_P4600,
CHIP_IRIS_5100,
CHIP_IRISP_5200,
CHIP_IRISP_P5200,
// Gen8
CHIP_HD_5300,
CHIP_HD_5500,
CHIP_HD_5600,
CHIP_HD_P5700,
CHIP_HD_6000,
CHIP_IRIS_6100,
CHIP_IRISP_6200,
CHIP_IRISP_P6300,
// Gen9
CHIP_HD_510,
CHIP_HD_515,
CHIP_HD_520,
CHIP_HD_530,
CHIP_HD_P530,
CHIP_HD_540,
CHIP_HD_550,
CHIP_IRIS_P555,
CHIP_IRIS_580,
CHIP_IRIS_P580,
// Gen9.5
CHIP_UHD_600,
CHIP_UHD_605,
CHIP_UHD_620,
CHIP_UHD_630,
CHIP_HD_610,
CHIP_HD_615,
CHIP_HD_620,
CHIP_HD_630,
CHIP_HD_P630,
CHIP_IRISP_640,
CHIP_IRISP_650,
};
#endif

46
src/intel/intel.cpp Normal file
View File

@@ -0,0 +1,46 @@
#include <stdio.h>
#include <string.h>
#include "intel.hpp"
#include "uarch.hpp"
#include "chips.hpp"
#include "udev.hpp"
#include "../common/pci.hpp"
#include "../common/global.hpp"
int64_t get_peak_performance_intel(struct gpu_info* gpu) {
return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2;
}
struct gpu_info* get_gpu_info_intel() {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->vendor = GPU_VENDOR_INTEL;
struct pci_dev *devices = get_pci_devices_from_pciutils();
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
if(gpu->pci == NULL) {
// No Intel iGPU found in PCI, which means it is not present
return NULL;
}
gpu->arch = get_uarch_from_pci(gpu->pci);
gpu->name = get_name_from_uarch(gpu->arch);
gpu->topo_i = get_topology_info(gpu->arch);
gpu->freq = get_max_freq_from_file(gpu->pci);
gpu->peak_performance = get_peak_performance_intel(gpu);
return gpu;
}
bool print_gpu_intel(struct gpu_info* gpu) {
if(gpu->vendor != GPU_VENDOR_INTEL) return false;
printf("Intel %s\n", gpu->name);
return true;
}
char* get_str_eu(struct gpu_info* gpu) {
return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice);
}

10
src/intel/intel.hpp Normal file
View File

@@ -0,0 +1,10 @@
#ifndef __INTEL_GPU__
#define __INTEL_GPU__
#include "../common/gpu.hpp"
struct gpu_info* get_gpu_info_intel();
bool print_gpu_intel(struct gpu_info* gpu);
char* get_str_eu(struct gpu_info* gpu);
#endif

88
src/intel/pci.cpp Normal file
View File

@@ -0,0 +1,88 @@
#include <stdio.h>
#include "pci.hpp"
#include "chips.hpp"
#include "../common/global.hpp"
#include "../common/pci.hpp"
#define CHECK_PCI_START if (false) {}
#define CHECK_PCI(pci, id, chip) \
else if (pci->device_id == id) return chip;
#define CHECK_PCI_END else { printBug("Unkown Intel device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_INTEL; }
/*
* https://github.com/mesa3d/mesa/blob/main/include/pci_ids/i965_pci_ids.h
*/
GPUCHIP get_chip_from_pci_intel(struct pci* pci) {
CHECK_PCI_START
// Gen6
CHECK_PCI(pci, 0x0102, CHIP_HD_2000)
CHECK_PCI(pci, 0x0106, CHIP_HD_2000)
CHECK_PCI(pci, 0x010A, CHIP_HD_2000)
CHECK_PCI(pci, 0x0112, CHIP_HD_3000)
CHECK_PCI(pci, 0x0122, CHIP_HD_3000)
CHECK_PCI(pci, 0x0116, CHIP_HD_3000)
CHECK_PCI(pci, 0x0126, CHIP_HD_3000)
// Gen7
CHECK_PCI(pci, 0x0152, CHIP_HD_2500)
CHECK_PCI(pci, 0x0156, CHIP_HD_2500)
CHECK_PCI(pci, 0x0162, CHIP_HD_4000)
CHECK_PCI(pci, 0x0166, CHIP_HD_4000)
CHECK_PCI(pci, 0x016a, CHIP_HD_P4000)
// Gen7.5
CHECK_PCI(pci, 0x0A1E, CHIP_HD_4200)
CHECK_PCI(pci, 0x041E, CHIP_HD_4400)
CHECK_PCI(pci, 0x0A16, CHIP_HD_4400)
CHECK_PCI(pci, 0x0412, CHIP_HD_4600)
CHECK_PCI(pci, 0x0416, CHIP_HD_4600)
CHECK_PCI(pci, 0x0D12, CHIP_HD_4600)
CHECK_PCI(pci, 0x041A, CHIP_HD_P4600)
CHECK_PCI(pci, 0x0A2E, CHIP_IRIS_5100)
CHECK_PCI(pci, 0x0D22, CHIP_IRISP_5200)
CHECK_PCI(pci, 0x0D26, CHIP_IRISP_P5200)
// Gen8
CHECK_PCI(pci, 0x161E, CHIP_HD_5300)
CHECK_PCI(pci, 0x1616, CHIP_HD_5500)
CHECK_PCI(pci, 0x1612, CHIP_HD_5600)
CHECK_PCI(pci, 0x161A, CHIP_HD_P5700)
CHECK_PCI(pci, 0x1626, CHIP_HD_6000)
CHECK_PCI(pci, 0x162B, CHIP_IRIS_6100)
CHECK_PCI(pci, 0x1622, CHIP_IRISP_6200)
CHECK_PCI(pci, 0x162A, CHIP_IRISP_P6300)
// Gen9
CHECK_PCI(pci, 0x1902, CHIP_HD_510)
CHECK_PCI(pci, 0x1906, CHIP_HD_510)
CHECK_PCI(pci, 0x190B, CHIP_HD_510)
CHECK_PCI(pci, 0x191E, CHIP_HD_515)
CHECK_PCI(pci, 0x1916, CHIP_HD_520)
CHECK_PCI(pci, 0x1921, CHIP_HD_520)
CHECK_PCI(pci, 0x1912, CHIP_HD_530)
CHECK_PCI(pci, 0x191B, CHIP_HD_530)
CHECK_PCI(pci, 0x191D, CHIP_HD_P530)
/*CHECK_PCI(pci, 0x5917, CHIP_HD_540)
CHECK_PCI(pci, 0x5917, CHIP_HD_550)
CHECK_PCI(pci, 0x5917, CHIP_HD_P555)
CHECK_PCI(pci, 0x5917, CHIP_HD_580)
CHECK_PCI(pci, 0x5917, CHIP_HD_P580)*/
// Gen9.5
CHECK_PCI(pci, 0x3185, CHIP_UHD_600)
CHECK_PCI(pci, 0x3184, CHIP_UHD_605)
CHECK_PCI(pci, 0x5917, CHIP_UHD_620)
CHECK_PCI(pci, 0x3E91, CHIP_UHD_630)
CHECK_PCI(pci, 0x3E92, CHIP_UHD_630)
CHECK_PCI(pci, 0x3E98, CHIP_UHD_630)
CHECK_PCI(pci, 0x3E9B, CHIP_UHD_630)
CHECK_PCI(pci, 0x9BC5, CHIP_UHD_630)
CHECK_PCI(pci, 0x9BC8, CHIP_UHD_630)
CHECK_PCI(pci, 0x5902, CHIP_HD_610)
CHECK_PCI(pci, 0x5906, CHIP_HD_610)
CHECK_PCI(pci, 0x590B, CHIP_HD_610)
CHECK_PCI(pci, 0x591E, CHIP_HD_615)
CHECK_PCI(pci, 0x5912, CHIP_HD_630)
CHECK_PCI(pci, 0x591B, CHIP_HD_630)
CHECK_PCI(pci, 0x591A, CHIP_HD_P630)
CHECK_PCI(pci, 0x591D, CHIP_HD_P630)
CHECK_PCI(pci, 0x5926, CHIP_IRISP_640)
CHECK_PCI(pci, 0x5927, CHIP_IRISP_650)
CHECK_PCI_END
}

19
src/intel/pci.hpp Normal file
View File

@@ -0,0 +1,19 @@
#ifndef __PCI_INTEL__
#define __PCI_INTEL__
#include <stdint.h>
#include "../common/pci.hpp"
#include "chips.hpp"
/*
* doc: https://wiki.osdev.org/PCI#Class_Codes
* https://pci-ids.ucw.cz/read/PC
*/
#define PCI_VENDOR_ID_INTEL 0x8086
struct pci;
GPUCHIP get_chip_from_pci_intel(struct pci* pci);
#endif

212
src/intel/uarch.cpp Normal file
View File

@@ -0,0 +1,212 @@
#include <stdint.h>
#include <cstddef>
#include <string.h>
#include <stdio.h>
#include "../common/uarch.hpp"
#include "../common/global.hpp"
#include "../common/gpu.hpp"
#include "chips.hpp"
#include "pci.hpp"
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
/*
* Mapping between iGPU and CPU uarchs
* -----------------------------------
* Gen6: Sandy Bridge (2th Gen)
* Gen7: Ivy Brdige (3th Gen)
* Gen7.5: Haswell (4th Gen)
* Gen8: Broadwell (5th Gen)
* Gen9: Skylake (6th Gen)
* Gen9.5: Kaby Lake
*/
enum {
UARCH_UNKNOWN,
UARCH_GEN6,
UARCH_GEN7,
UARCH_GEN7_5,
UARCH_GEN8,
UARCH_GEN9,
UARCH_GEN9_5,
};
static const char *uarch_str[] = {
/*[ARCH_UNKNOWN = */ STRING_UNKNOWN,
/*[ARCH_GEN6] = */ "Gen6",
/*[ARCH_GEN7] = */ "Gen7",
/*[ARCH_GEN7_5] = */ "Gen7.5",
/*[ARCH_GEN8] = */ "Gen8",
/*[ARCH_GEN9] = */ "Gen9",
/*[ARCH_GEN9_5] = */ "Gen9.5",
};
// Graphic Tiers (GT)
enum {
GT_UNKNOWN,
GT1,
GT1_5,
GT2,
GT3,
GT3e,
GT4e
};
static const char *gt_str[] = {
/*[GT_UNKNOWN] = */ STRING_UNKNOWN,
/*[GT1] = */ "GT1",
/*[GT1_5] = */ "GT1.5",
/*[GT2] = */ "GT2",
/*[GT3] = */ "GT3",
/*[GT3e] = */ "GT3e",
/*[GT4e] = */ "GT4e",
};
#define CHECK_UARCH_START if (false) {}
#define CHECK_UARCH(arch, chip_, str, uarch, gt, process) \
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch_intel: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
#define CHECK_TOPO_START if (false) {}
#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \
else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli);
#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); }
void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) {
topo_i->slices = sli;
topo_i->subslices = sub;
topo_i->eu_subslice = eu_sub;
}
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->chip_str, str);
arch->uarch = u;
arch->process = process;
arch->gt = gt;
}
void map_chip_to_uarch_intel(struct uarch* arch) {
CHECK_UARCH_START
// Gen6
CHECK_UARCH(arch, CHIP_HD_2000, "HD Graphics 2000", UARCH_GEN6, GT1, 32)
CHECK_UARCH(arch, CHIP_HD_3000, "HD Graphics 3000", UARCH_GEN6, GT2, 32)
// Gen7
CHECK_UARCH(arch, CHIP_HD_2500, "HD Graphics 2500", UARCH_GEN7, GT1, 22)
CHECK_UARCH(arch, CHIP_HD_4000, "HD Graphics 4000", UARCH_GEN7, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_P4000, "HD Graphics P4000", UARCH_GEN7, GT2, 22)
// Gen7.5
CHECK_UARCH(arch, CHIP_HD_4200, "HD Graphics 4200", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_4400, "HD Graphics 4400", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_4600, "HD Graphics 4600", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_P4600, "HD Graphics P4600", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_IRIS_5100, "HD Iris 5100", UARCH_GEN7_5, GT3, 22)
CHECK_UARCH(arch, CHIP_IRISP_5200, "HD Iris Pro 5200", UARCH_GEN7_5, GT3, 22)
CHECK_UARCH(arch, CHIP_IRISP_P5200, "HD Iris Pro P5200", UARCH_GEN7_5, GT3, 22)
// Gen8
CHECK_UARCH(arch, CHIP_HD_5300, "HD Graphics 5300", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_5500, "HD Graphics 5500", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_5600, "HD Graphics 5600", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_P5700, "HD Graphics P5700", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_6000, "HD Graphics 6000", UARCH_GEN8, GT3, 14)
CHECK_UARCH(arch, CHIP_IRIS_6100, "Iris Graphics 6100", UARCH_GEN8, GT3, 14)
CHECK_UARCH(arch, CHIP_IRISP_6200, "Iris Pro Graphics 6200", UARCH_GEN8, GT3, 14)
CHECK_UARCH(arch, CHIP_IRISP_P6300, "Iris Pro Graphics P6300", UARCH_GEN8, GT3, 14)
// Gen9
CHECK_UARCH(arch, CHIP_HD_510, "HD Graphics 510", UARCH_GEN9, GT1, 14)
CHECK_UARCH(arch, CHIP_HD_515, "HD Graphics 515", UARCH_GEN9, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_520, "HD Graphics 520", UARCH_GEN9, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_530, "HD Graphics 530", UARCH_GEN9, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_P530, "HD Graphics P530", UARCH_GEN9, GT2, 14)
// Gen9.5
CHECK_UARCH(arch, CHIP_UHD_600, "UHD Graphics 600", UARCH_GEN9_5, GT1, 14)
CHECK_UARCH(arch, CHIP_UHD_605, "UHD Graphics 605", UARCH_GEN9_5, GT1_5, 14)
CHECK_UARCH(arch, CHIP_UHD_620, "UHD Graphics 620", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_UHD_630, "UHD Graphics 630", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_610, "HD Graphics 610", UARCH_GEN9_5, GT1, 14)
CHECK_UARCH(arch, CHIP_HD_615, "HD Graphics 615", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_630, "HD Graphics 630", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_P630, "HD Graphics P630", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 640", UARCH_GEN9_5, GT3e, 14)
CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 650", UARCH_GEN9_5, GT3e, 14)
CHECK_UARCH_END
}
const char* get_str_uarch_intel(struct uarch* arch) {
return uarch_str[arch->uarch];
}
const char* get_str_gt(struct uarch* arch) {
return gt_str[arch->gt];
}
struct uarch* get_uarch_from_pci(struct pci* pci) {
struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
arch->chip_str = NULL;
arch->chip = get_chip_from_pci_intel(pci);
if(arch->chip == CHIP_UNKNOWN_INTEL) {
return NULL;
}
else {
map_chip_to_uarch_intel(arch);
return arch;
}
}
char* get_name_from_uarch(struct uarch* arch) {
char* name = (char *) emalloc(sizeof(char) * (strlen(arch->chip_str) + 6 + 1));
sprintf(name, "Intel %s", arch->chip_str);
return name;
}
/*
* Refs:
* Gen6: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen6
* Gen7/7.5: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen7
"The Compute Architecture of Intel Processor Graphics Gen7.5, v1.0"
* Gen8: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen8
"The Compute Architecture of Intel Processor Graphics Gen8, v1.1"
* Gen9: https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration
"The Compute Architecture of Intel Processor Graphics Gen9, v1.0"
* Gen9.5: https://en.wikichip.org/wiki/intel/microarchitectures/gen9.5#Configuration
*/
struct topology_i* get_topology_info(struct uarch* arch) {
struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i));
// Syntax: (EU per subslice, Subslices, Slices)
CHECK_TOPO_START
// Gen6
CHECK_TOPO(topo, arch, UARCH_GEN6, GT1, 6, 1, 1)
CHECK_TOPO(topo, arch, UARCH_GEN6, GT2, 6, 2, 1)
// Gen7
CHECK_TOPO(topo, arch, UARCH_GEN7, GT1, 6, 1, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7, GT2, 8, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7, GT3, 6, 1, 1)
// Gen7.5
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT1, 10, 1, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT2, 10, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT3, 10, 4, 1)
// Gen8
CHECK_TOPO(topo, arch, UARCH_GEN8, GT1, 6, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN8, GT2, 8, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN8, GT3, 8, 6, 2)
// Gen9
CHECK_TOPO(topo, arch, UARCH_GEN9, GT1, 6, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9, GT2, 8, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9, GT3, 8, 6, 2)
CHECK_TOPO(topo, arch, UARCH_GEN9, GT4e, 8, 9, 3)
// Gen9.5
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1, 6, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1_5, 6, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2, 8, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3, 8, 6, 2)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3e, 8, 6, 2) // Same as GT3, but has eDRAM cache
CHECK_TOPO_END
return topo;
}

14
src/intel/uarch.hpp Normal file
View File

@@ -0,0 +1,14 @@
#ifndef __INTEL_UARCH__
#define __INTEL_UARCH__
#include "../common/gpu.hpp"
struct uarch;
struct uarch* get_uarch_from_pci(struct pci* pci);
char* get_name_from_uarch(struct uarch* arch);
char* get_str_gt(struct uarch* arch);
char* get_str_uarch_intel(struct uarch* arch);
struct topology_i* get_topology_info(struct uarch* arch);
#endif

89
src/intel/udev.cpp Normal file
View File

@@ -0,0 +1,89 @@
#include <cstddef>
#include <cstring>
#include <cstdlib>
#include <cstdint>
#include <cerrno>
#include <cstdio>
#include <fcntl.h>
#include <unistd.h>
#include "../common/global.hpp"
#include "../common/pci.hpp"
#define _PATH_SYS_SYSTEM "/sys/devices/pci0000:00"
#define _PATH_SYS_DRM "/drm"
#define _PATH_CARD "/card0"
#define _PATH_FREQUENCY_MAX "/gt_max_freq_mhz"
#define _PATH_FREQUENCY_MIN "/gt_min_freq_mhz"
#define _PATH_FREQUENCY_MAX_LEN 100
#define DEFAULT_FILE_SIZE 4096
#define UNKNOWN_DATA -1
char* read_file(char* path, int* len) {
int fd = open(path, O_RDONLY);
if(fd == -1) {
return NULL;
}
//File exists, read it
int bytes_read = 0;
int offset = 0;
int block = 128;
char* buf = (char *) emalloc(sizeof(char)*DEFAULT_FILE_SIZE);
memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE);
while ( (bytes_read = read(fd, buf+offset, block)) > 0 ) {
offset += bytes_read;
}
if (close(fd) == -1) {
return NULL;
}
*len = offset;
return buf;
}
long get_freq_from_file(char* path) {
int filelen;
char* buf;
if((buf = read_file(path, &filelen)) == NULL) {
printWarn("Could not open '%s'", path);
return UNKNOWN_DATA;
}
char* end;
errno = 0;
long ret = strtol(buf, &end, 10);
if(errno != 0) {
printBug("strtol: %s", strerror(errno));
free(buf);
return UNKNOWN_DATA;
}
// We will be getting the frequency in MHz
// We consider it is an error if frequency is
// greater than 10 GHz or less than 100 MHz
if(ret > 10000 || ret < 100) {
printBug("Invalid data was read from file '%s': %ld\n", path, ret);
return UNKNOWN_DATA;
}
free(buf);
return ret;
}
long get_max_freq_from_file(struct pci* pci) {
char path[_PATH_FREQUENCY_MAX_LEN];
sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MAX);
return get_freq_from_file(path);
}
long get_min_freq_from_file(struct pci* pci) {
char path[_PATH_FREQUENCY_MAX_LEN];
sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MIN);
return get_freq_from_file(path);
}

7
src/intel/udev.hpp Normal file
View File

@@ -0,0 +1,7 @@
#ifndef __UDEV__
#define __UDEV__
long get_max_freq_from_file(struct pci* pci);
long get_min_freq_from_file(struct pci* pci);
#endif