24 Commits
v0.11 ... intel

Author SHA1 Message Date
Dr-Noob
a397eb398e [v0.11] Handle the case where the GPU is not found in the pci LUT 2021-12-18 20:12:41 +01:00
Dr-Noob
bfb9738132 [v0.11] Do not show error message when there is no Intel iGPU 2021-12-18 10:35:51 +01:00
Dr-Noob
6d4d8b621b [v0.11] Fix compilation error and ambiguity with CUDA and Intel backend when enabled at the same time due to functions with the same name 2021-12-18 10:14:14 +01:00
Dr-Noob
93889b2b18 [v0.11] Small adjustments to fix compilation on older compilers 2021-12-10 16:18:39 +01:00
Dr-Noob
b6ce96e746 [v0.11] Add missing Intel iGPU topologies. Add script to check for missing topo/uarchs 2021-12-10 15:55:59 +01:00
Dr-Noob
5f52f73fe0 [v0.11] Completed most of Intel iGPU topologies 2021-12-10 15:32:29 +01:00
Dr-Noob
e5deeb1309 [v0.11] Adding more Intel iGPU topologies 2021-12-10 15:16:29 +01:00
Dr-Noob
44a884fd07 [v0.11] Print peak performance in Intel iGPU 2021-12-09 20:28:07 +01:00
Dr-Noob
1663a36135 [v0.11] Fetch and print max Intel iGPU frequency using sysfs 2021-12-09 20:18:39 +01:00
Dr-Noob
844377f17a [v0.11] Add support for printing EUs (currently only in Gen9/Gen9.5) 2021-12-08 11:15:59 +01:00
Dr-Noob
2034bac006 [v0.11] Displaying Graphics Tier in Intel iGPUs 2021-11-27 14:02:02 +01:00
Dr-Noob
e7c4d5bf91 [v0.11] Adding Gen6, 7, 7.5 and 8 to database 2021-11-27 12:23:41 +01:00
Dr-Noob
b00050e739 [v0.11] Print available more information for iGPU 2021-11-27 11:22:16 +01:00
Dr-Noob
8db60b614d [v0.11] Adding most of Gen9/9.5 iGPUs to database 2021-11-27 11:10:01 +01:00
Dr-Noob
8740337145 [v0.11] Adding uarch backend for intel iGPUs 2021-11-26 12:52:45 +01:00
Dr-Noob
ce004725ad [v0.11] Working in printer backend to show logo and text for intel iGPU 2021-11-26 09:58:45 +01:00
Dr-Noob
310486a6a2 [v0.11] Fixes to recover CUDA functionality, ready for implementing Intel iGPU code 2021-11-26 09:33:57 +01:00
Dr-Noob
e5a4f91b20 [v0.11] Hacky way to solve CMake issues without requiring newer CMake versions 2021-11-26 09:19:24 +01:00
Dr-Noob
461e0d2ede [v0.11] Working in master GPU handler for supporting diverse GPU vendors 2021-11-26 08:22:30 +01:00
Dr-Noob
149e5ad62c [v0.11] Working for future support of Intel iGPUs 2021-11-25 19:03:52 +01:00
Dr-Noob
3502f48f71 [v0.11] Style adjustments in README 2021-11-25 18:06:00 +01:00
Dr-Noob
5acb4ff7dc [v0.11] Small style adjustments in README 2021-11-25 18:01:57 +01:00
Dr-Noob
074c159e5f [v0.11] Update README image 2021-11-25 17:58:57 +01:00
Dr-Noob
cedcfecb80 [v0.11] Dont show tensor cores when there is 0. Use MMA (matrix multiply accumulate) instead of TC (tensor cores) 2021-11-25 17:52:58 +01:00
32 changed files with 1082 additions and 256 deletions

View File

@@ -7,23 +7,22 @@ project(gpufetch CXX)
set(SRC_DIR "src") set(SRC_DIR "src")
set(COMMON_DIR "${SRC_DIR}/common") set(COMMON_DIR "${SRC_DIR}/common")
set(CUDA_DIR "${SRC_DIR}/cuda") set(CUDA_DIR "${SRC_DIR}/cuda")
set(INTEL_DIR "${SRC_DIR}/intel")
if(NOT WIN32) if(NOT DEFINED ENABLE_INTEL_BACKEND)
string(ASCII 27 Esc) set(ENABLE_INTEL_BACKEND true)
set(ColorReset "${Esc}[m")
set(ColorBold "${Esc}[1m")
set(Red "${Esc}[31m")
set(Green "${Esc}[32m")
set(BoldRed "${Esc}[1;31m")
set(BoldGreen "${Esc}[1;32m")
set(BoldYellow "${Esc}[1;33m")
endif() endif()
if(NOT DEFINED ENABLE_CUDA_BACKEND OR ENABLE_CUDA_BACKEND)
check_language(CUDA) check_language(CUDA)
if(CMAKE_CUDA_COMPILER) if(CMAKE_CUDA_COMPILER)
enable_language(CUDA) enable_language(CUDA)
set(ENABLE_CUDA_BACKEND true)
# Must link_directories early so add_executable(gpufetch ...) gets the right directories
link_directories(cuda_backend ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
else() else()
message(FATAL_ERROR "${BoldRed}[ERROR]${ColorReset} Unable to find CUDA compiler. You may use -DCMAKE_CUDA_COMPILER and -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT if CUDA is installed but not detected by CMake") set(ENABLE_CUDA_BACKEND false)
endif()
endif() endif()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
@@ -48,8 +47,24 @@ else()
link_libraries(${PCIUTILS_LIBRARIES}) link_libraries(${PCIUTILS_LIBRARIES})
endif() endif()
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith") set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic") set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11")
if(ENABLE_INTEL_BACKEND)
target_compile_definitions(gpufetch PUBLIC BACKEND_INTEL)
add_library(intel_backend STATIC ${INTEL_DIR}/intel.cpp ${INTEL_DIR}/pci.cpp ${INTEL_DIR}/uarch.cpp ${INTEL_DIR}/udev.cpp)
if(NOT ${PCIUTILS_FOUND})
add_dependencies(intel_backend pciutils)
endif()
target_link_libraries(gpufetch intel_backend)
endif()
if(ENABLE_CUDA_BACKEND)
target_compile_definitions(gpufetch PUBLIC BACKEND_CUDA)
# https://en.wikipedia.org/w/index.php?title=CUDA&section=5#GPUs_supported # https://en.wikipedia.org/w/index.php?title=CUDA&section=5#GPUs_supported
# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake # https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
@@ -63,19 +78,42 @@ elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62) set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
endif() endif()
link_directories(${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp) add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp)
if(NOT ${PCIUTILS_FOUND}) if(NOT ${PCIUTILS_FOUND})
add_dependencies(cuda_backend pciutils) add_dependencies(cuda_backend pciutils)
add_dependencies(gpufetch pciutils)
endif() endif()
target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include) target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
target_link_libraries(cuda_backend cudart) target_link_libraries(cuda_backend PRIVATE cudart)
target_link_libraries(gpufetch cuda_backend pci z) target_link_libraries(gpufetch cuda_backend)
endif()
target_link_libraries(gpufetch pci z)
install(TARGETS gpufetch DESTINATION bin) install(TARGETS gpufetch DESTINATION bin)
if(NOT WIN32)
string(ASCII 27 Esc)
set(ColorReset "${Esc}[m")
set(ColorBold "${Esc}[1m")
set(Red "${Esc}[31m")
set(Green "${Esc}[32m")
set(BoldRed "${Esc}[1;31m")
set(BoldGreen "${Esc}[1;32m")
set(BoldYellow "${Esc}[1;33m")
endif()
message(STATUS "----------------------")
message(STATUS "gpufetch build report:")
if(ENABLE_INTEL_BACKEND)
message(STATUS "Intel backend: ${BoldGreen}ON${ColorReset}")
else()
message(STATUS "Intel backend: ${BoldRed}OFF${ColorReset}")
endif()
if(ENABLE_CUDA_BACKEND)
message(STATUS "CUDA backend: ${BoldGreen}ON${ColorReset}")
else()
message(STATUS "CUDA backend: ${BoldRed}OFF${ColorReset}")
endif()
message(STATUS "----------------------")

View File

@@ -1,19 +1,30 @@
<p align="center"><img width=50% src="./pictures/gpufetch.png"></p> <p align="center"><img width=50% src="./pictures/gpufetch.png"></p>
<div align="center">
![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/Dr-Noob/gpufetch?label=gpufetch)
[![GitHub Repo stars](https://img.shields.io/github/stars/Dr-Noob/gpufetch?color=4CC61F)](https://github.com/Dr-Noob/gpufetch/stargazers)
[![GitHub issues](https://img.shields.io/github/issues/Dr-Noob/gpufetch)](https://github.com/Dr-Noob/gpufetch/issues)
[![License](https://img.shields.io/github/license/Dr-Noob/gpufetch?color=orange)](https://github.com/Dr-Noob/gpufetch/blob/master/LICENSE)
<h4 align="center">Simple yet fancy GPU architecture fetching tool</h4> <h4 align="center">Simple yet fancy GPU architecture fetching tool</h4>
&nbsp;
![gpu_img](pictures/2080ti.png) <p align="center"> </p>
<div align="center">
<img height="22px" src="https://img.shields.io/github/v/tag/Dr-Noob/gpufetch?label=gpufetch&style=flat-square">
<a href="https://github.com/Dr-Noob/gpufetch/stargazers">
<img height="22px" src="https://img.shields.io/github/stars/Dr-Noob/gpufetch?color=4CC61F&style=flat-square">
</a>
<a href="https://github.com/Dr-Noob/gpufetch/issues">
<img height="22px" src="https://img.shields.io/github/issues/Dr-Noob/gpufetch?style=flat-square">
</a>
<a href="https://github.com/Dr-Noob/gpufetch/blob/master/LICENSE">
<img height="22px" src="https://img.shields.io/github/license/Dr-Noob/gpufetch?color=orange&style=flat-square">
</a>
</div> </div>
<p align="center"> </p>
<p align="center">
gpufetch is a command-line tool written in C that displays the GPU information in a clean and beautiful way
</p>
<p align="center"><img width=80% src="./pictures/2080ti.png"></p>
# Table of contents # Table of contents
<!-- UPDATE with: doctoc --notitle README.md --> <!-- UPDATE with: doctoc --notitle README.md -->
<!-- START doctoc generated TOC please keep comment here to allow auto update --> <!-- START doctoc generated TOC please keep comment here to allow auto update -->

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

After

Width:  |  Height:  |  Size: 882 KiB

View File

@@ -13,8 +13,13 @@
#define NUM_COLORS 4 #define NUM_COLORS 4
#define COLOR_STR_NVIDIA "nvidia" #define COLOR_STR_NVIDIA "nvidia"
#define COLOR_STR_INTEL "intel"
#define COLOR_DEFAULT_NVIDIA "118,185,0:255,255,255:255,255,255:118,185,0" // +-----------------------+-----------------------+
// | Color logo | Color text |
// | Color 1 | Color 2 | Color 1 | Color 2 |
#define COLOR_DEFAULT_NVIDIA "118,185,000:255,255,255:255,255,255:118,185,000"
#define COLOR_DEFAULT_INTEL "015,125,194:230,230,230:040,150,220:230,230,230"
struct args_struct { struct args_struct {
bool help_flag; bool help_flag;
@@ -145,6 +150,7 @@ bool parse_color(char* optarg_str, struct color*** cs) {
bool free_ptr = true; bool free_ptr = true;
if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA; if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA;
else if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) color_to_copy = COLOR_DEFAULT_INTEL;
else { else {
str_to_parse = optarg_str; str_to_parse = optarg_str;
free_ptr = false; free_ptr = false;

View File

@@ -1,32 +1,32 @@
#ifndef __ASCII__ #ifndef __ASCII__
#define __ASCII__ #define __ASCII__
#define COLOR_NONE "" #define C_NONE ""
#define COLOR_FG_BLACK "\x1b[30;1m" #define C_FG_BLACK "\x1b[30;1m"
#define COLOR_FG_RED "\x1b[31;1m" #define C_FG_RED "\x1b[31;1m"
#define COLOR_FG_GREEN "\x1b[32;1m" #define C_FG_GREEN "\x1b[32;1m"
#define COLOR_FG_YELLOW "\x1b[33;1m" #define C_FG_YELLOW "\x1b[33;1m"
#define COLOR_FG_BLUE "\x1b[34;1m" #define C_FG_BLUE "\x1b[34;1m"
#define COLOR_FG_MAGENTA "\x1b[35;1m" #define C_FG_MAGENTA "\x1b[35;1m"
#define COLOR_FG_CYAN "\x1b[36;1m" #define C_FG_CYAN "\x1b[36;1m"
#define COLOR_FG_WHITE "\x1b[37;1m" #define C_FG_WHITE "\x1b[37;1m"
#define COLOR_BG_BLACK "\x1b[40;1m" #define C_BG_BLACK "\x1b[40;1m"
#define COLOR_BG_RED "\x1b[41;1m" #define C_BG_RED "\x1b[41;1m"
#define COLOR_BG_GREEN "\x1b[42;1m" #define C_BG_GREEN "\x1b[42;1m"
#define COLOR_BG_YELLOW "\x1b[43;1m" #define C_BG_YELLOW "\x1b[43;1m"
#define COLOR_BG_BLUE "\x1b[44;1m" #define C_BG_BLUE "\x1b[44;1m"
#define COLOR_BG_MAGENTA "\x1b[45;1m" #define C_BG_MAGENTA "\x1b[45;1m"
#define COLOR_BG_CYAN "\x1b[46;1m" #define C_BG_CYAN "\x1b[46;1m"
#define COLOR_BG_WHITE "\x1b[47;1m" #define C_BG_WHITE "\x1b[47;1m"
#define COLOR_FG_B_BLACK "\x1b[90;1m" #define C_FG_B_BLACK "\x1b[90;1m"
#define COLOR_FG_B_RED "\x1b[91;1m" #define C_FG_B_RED "\x1b[91;1m"
#define COLOR_FG_B_GREEN "\x1b[92;1m" #define C_FG_B_GREEN "\x1b[92;1m"
#define COLOR_FG_B_YELLOW "\x1b[93;1m" #define C_FG_B_YELLOW "\x1b[93;1m"
#define COLOR_FG_B_BLUE "\x1b[94;1m" #define C_FG_B_BLUE "\x1b[94;1m"
#define COLOR_FG_B_MAGENTA "\x1b[95;1m" #define C_FG_B_MAGENTA "\x1b[95;1m"
#define COLOR_FG_B_CYAN "\x1b[96;1m" #define C_FG_B_CYAN "\x1b[96;1m"
#define COLOR_FG_B_WHITE "\x1b[97;1m" #define C_FG_B_WHITE "\x1b[97;1m"
#define COLOR_RESET "\x1b[m" #define C_RESET "\x1b[m"
struct ascii_logo { struct ascii_logo {
const char* art; const char* art;
@@ -59,6 +59,23 @@ $C2## ## ## ## ## ## ## ## #: :# \
$C2## ## ## ## ## ## ## ## ####### \ $C2## ## ## ## ## ## ## ## ####### \
$C2## ## ### ## ###### ## ## ## " $C2## ## ### ## ###### ## ## ## "
#define ASCII_INTEL \
"$C1 .#################. \
$C1 .#### ####. \
$C1 .## ### \
$C1 ## :## ### \
$C1 # ## :## ## \
$C1 ## ## ######. #### ###### :## ## \
$C1 ## ## ##: ##: ## ## ### :## ### \
$C1## ## ##: ##: ## :######## :## ## \
$C1## ## ##: ##: ## ##. . :## #### \
$C1## # ##: ##: #### #####: ## \
$C1 ## \
$C1 ###. ..o####. \
$C1 ######oo... ..oo####### \
$C1 o###############o "
// LONG LOGOS
#define ASCII_NVIDIA_L \ #define ASCII_NVIDIA_L \
"$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \ "$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \
$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \ $C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \
@@ -76,14 +93,37 @@ $C1 olcc::; ,:ccloMMMMMMMMM \
$C1 :......oMMMMMMMMMMMMMMMMMMMMMM \ $C1 :......oMMMMMMMMMMMMMMMMMMMMMM \
$C1 :lllMMMMMMMMMMMMMMMMMMMMMMMMMM " $C1 :lllMMMMMMMMMMMMMMMMMMMMMMMMMM "
#define ASCII_INTEL_L \
"$C1 ###############@ \
$C1 ######@ ######@ \
$C1 ###@ ###@ \
$C1 ##@ ###@ \
$C1 ##@ ##@ \
$C1 ##@ ##@ \
$C1 @ ##@ ##@ ##@ \
$C1 #@ ##@ ########@ #####@ #####@ ##@ ##@ \
$C1 #@ ##@ ##@ ##@ ##@ ###@ ###@ ##@ ##@ \
$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ \
$C1 #@ ##@ ##@ ##@ ##@ #########@ ##@ ###@ \
$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ####@ \
$C1 #@ #@ ##@ ##@ ####@ ########@ #@ ##@ \
$C1 ##@ \
$C1 ##@ \
$C1 ###@ ###@ \
$C1 ####@ #########@ \
$C1 #########@ ###############@ \
$C1 ##############################@ "
typedef struct ascii_logo asciiL; typedef struct ascii_logo asciiL;
// ------------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------
// | LOGO | W | H | REPLACE | COLORS LOGO (>0 && <10) | COLORS TEXT (=2) | // | LOGO | W | H | REPLACE | COLORS LOGO | COLORS TEXT |
// ------------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------
asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} }; asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
// Long variants | ---------------------------------------------------------------------------------------------------| asciiL logo_intel = { ASCII_INTEL, 48, 14, false, {C_FG_CYAN}, {C_FG_CYAN, C_FG_WHITE} };
asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} }; // Long variants | ---------------------------------------------------------------------------------------|
asciiL logo_unknown = { NULL, 0, 0, false, {COLOR_NONE}, {COLOR_NONE, COLOR_NONE} }; asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
asciiL logo_intel_l = { ASCII_INTEL_L, 62, 19, true, {C_BG_CYAN, C_BG_WHITE}, {C_FG_CYAN, C_FG_WHITE} };
asciiL logo_unknown = { NULL, 0, 0, false, {C_NONE}, {C_NONE, C_NONE} };
#endif #endif

View File

@@ -32,8 +32,6 @@ VENDOR get_gpu_vendor(struct gpu_info* gpu) {
return gpu->vendor; return gpu->vendor;
} }
double trunc(double val) { return ((int)(100 * val)) / 100.0; }
int32_t get_value_as_smallest_unit(char ** str, uint64_t value) { int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
int32_t ret; int32_t ret;
int max_len = 10; // Max is 8 for digits, 2 for units int max_len = 10; // Max is 8 for digits, 2 for units
@@ -145,6 +143,13 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
} }
char* get_str_peak_performance_tensor(struct gpu_info* gpu) { char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
return get_str_peak_performance_generic(gpu->peak_performance_t); return get_str_peak_performance_generic(gpu->peak_performance_tcu);
} }
char* get_str_generic(int32_t data) {
// Largest int is 10, +1 for possible negative, +1 for EOL
uint32_t max_size = 12;
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", data);
return dummy;
}

View File

@@ -9,7 +9,8 @@
#define UNKNOWN_FREQ -1 #define UNKNOWN_FREQ -1
enum { enum {
GPU_VENDOR_NVIDIA GPU_VENDOR_NVIDIA,
GPU_VENDOR_INTEL
}; };
enum { enum {
@@ -43,6 +44,12 @@ struct topology {
int32_t tensor_cores; int32_t tensor_cores;
}; };
struct topology_i {
int32_t slices;
int32_t subslices;
int32_t eu_subslice;
};
struct memory { struct memory {
int64_t size_bytes; int64_t size_bytes;
MEMTYPE type; MEMTYPE type;
@@ -58,10 +65,11 @@ struct gpu_info {
int64_t freq; int64_t freq;
struct pci* pci; struct pci* pci;
struct topology* topo; struct topology* topo;
struct topology_i* topo_i;
struct memory* mem; struct memory* mem;
struct cache* cach; struct cache* cach;
int64_t peak_performance; int64_t peak_performance;
int64_t peak_performance_t; int64_t peak_performance_tcu;
int32_t idx; int32_t idx;
}; };
@@ -75,5 +83,6 @@ char* get_str_memory_clock(struct gpu_info* gpu);
char* get_str_l2(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu);
char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu);
char* get_str_peak_performance_tensor(struct gpu_info* gpu); char* get_str_peak_performance_tensor(struct gpu_info* gpu);
char* get_str_generic(int32_t data);
#endif #endif

View File

@@ -4,6 +4,7 @@
#include "args.hpp" #include "args.hpp"
#include "global.hpp" #include "global.hpp"
#include "master.hpp"
#include "../cuda/cuda.hpp" #include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp" #include "../cuda/uarch.hpp"
@@ -65,18 +66,19 @@ int main(int argc, char* argv[]) {
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
struct gpu_list* list = get_gpu_list();
if(list_gpus()) { if(list_gpus()) {
return print_gpus_list(); return print_gpus_list(list);
} }
set_log_level(true); set_log_level(true);
printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\ printf("[WARNING]: gpufetch is in beta. The provided information may be incomplete or wrong.\n\
If you want to help to improve gpufetch, please compare the output of the program\n\ If you want to help to improve gpufetch, please compare the output of the program\n\
with a reliable source which you know is right (e.g, techpowerup.com) and report\n\ with a reliable source which you know is right (e.g, techpowerup.com) and report\n\
any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues"); any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues\n");
struct gpu_info* gpu = get_gpu_info(get_gpu_idx()); struct gpu_info* gpu = get_gpu_info(list, get_gpu_idx());
if(gpu == NULL) if(gpu == NULL)
return EXIT_FAILURE; return EXIT_FAILURE;

62
src/common/master.cpp Normal file
View File

@@ -0,0 +1,62 @@
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include "master.hpp"
#include "../cuda/cuda.hpp"
#include "../intel/intel.hpp"
#define MAX_GPUS 1000
struct gpu_list {
struct gpu_info ** gpus;
int num_gpus;
};
struct gpu_list* get_gpu_list() {
int idx = 0;
struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
list->num_gpus = 0;
list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
#ifdef BACKEND_CUDA
bool valid = true;
while(valid) {
list->gpus[idx] = get_gpu_info_cuda(idx);
if(list->gpus[idx] != NULL) idx++;
else valid = false;
}
list->num_gpus += idx;
#endif
#ifdef BACKEND_INTEL
list->gpus[idx] = get_gpu_info_intel();
if(list->gpus[idx] != NULL) list->num_gpus++;
#endif
return list;
}
bool print_gpus_list(struct gpu_list* list) {
for(int i=0; i < list->num_gpus; i++) {
printf("GPU %d: ", i);
if(list->gpus[i]->vendor == GPU_VENDOR_NVIDIA) {
#ifdef BACKEND_CUDA
print_gpu_cuda(list->gpus[i]);
#endif
}
else if(list->gpus[i]->vendor == GPU_VENDOR_INTEL) {
#ifdef BACKEND_INTEL
print_gpu_intel(list->gpus[i]);
#endif
}
}
return true;
}
struct gpu_info* get_gpu_info(struct gpu_list* list, int idx) {
return list->gpus[idx];
}

12
src/common/master.hpp Normal file
View File

@@ -0,0 +1,12 @@
#ifndef __GPU_LIST__
#define __GPU_LIST__
#include "gpu.hpp"
struct gpu_list;
struct gpu_list* get_gpu_list();
bool print_gpus_list(struct gpu_list* list);
struct gpu_info* get_gpu_info(struct gpu_list* list, int idx);
#endif

View File

@@ -2,33 +2,61 @@
#include "pci.hpp" #include "pci.hpp"
#include <cstddef> #include <cstddef>
/*
* doc: https://wiki.osdev.org/PCI#Class_Codes
* https://pci-ids.ucw.cz/read/PC
*/
#define VENDOR_ID_NVIDIA 0x10de
#define CLASS_VGA_CONTROLLER 0x0300 #define CLASS_VGA_CONTROLLER 0x0300
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices) { bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) { for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) { if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
return dev->vendor_id; return true;
} }
} }
printErr("Unable to find a CUDA device using pciutils");
return 0;
}
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices) { printWarn("Unable to find a valid device for id %d using pciutils", id);
return false;
}
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) {
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) { for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) { if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
return dev->device_id; return dev->device_id;
} }
} }
printErr("Unable to find a CUDA device using pciutils");
printErr("Unable to find a valid device for id %d using pciutils", id);
return 0; return 0;
} }
void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) {
bool found = false;
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
pci->domain = dev->domain;
pci->bus = dev->bus;
pci->dev = dev->dev;
pci->func = dev->func;
found = true;
}
}
if(!found) printErr("Unable to find a valid device for id %d using pciutils", id);
}
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) {
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
// TODO: Refactor this; instead of 2xGet + 1xSet, do it better
if(pciutils_is_vendor_id_present(devices, id)) {
pci->vendor_id = id;
pci->device_id = pciutils_get_pci_device_id(devices, id);
pciutils_set_pci_bus(pci, devices, id);
return pci;
}
else {
return NULL;
}
}
struct pci_dev *get_pci_devices_from_pciutils() { struct pci_dev *get_pci_devices_from_pciutils() {
struct pci_access *pacc; struct pci_access *pacc;
struct pci_dev *dev; struct pci_dev *dev;

View File

@@ -6,8 +6,16 @@ extern "C" {
#include <pci/pci.h> #include <pci/pci.h>
} }
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices); struct pci {
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices); uint16_t vendor_id;
uint16_t device_id;
uint16_t domain;
uint16_t bus;
uint16_t dev;
uint16_t func;
};
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id);
struct pci_dev *get_pci_devices_from_pciutils(); struct pci_dev *get_pci_devices_from_pciutils();
#endif #endif

View File

@@ -9,6 +9,8 @@
#include "../common/global.hpp" #include "../common/global.hpp"
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
#include "../intel/uarch.hpp"
#include "../intel/intel.hpp"
#include "../cuda/cuda.hpp" #include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp" #include "../cuda/uarch.hpp"
@@ -34,11 +36,13 @@ enum {
ATTRIBUTE_CHIP, ATTRIBUTE_CHIP,
ATTRIBUTE_UARCH, ATTRIBUTE_UARCH,
ATTRIBUTE_TECHNOLOGY, ATTRIBUTE_TECHNOLOGY,
ATTRIBUTE_GT,
ATTRIBUTE_FREQUENCY, ATTRIBUTE_FREQUENCY,
ATTRIBUTE_STREAMINGMP, ATTRIBUTE_STREAMINGMP,
ATTRIBUTE_CORESPERMP, ATTRIBUTE_CORESPERMP,
ATTRIBUTE_CUDA_CORES, ATTRIBUTE_CUDA_CORES,
ATTRIBUTE_TENSOR_CORES, ATTRIBUTE_TENSOR_CORES,
ATTRIBUTE_EUS,
ATTRIBUTE_L2, ATTRIBUTE_L2,
ATTRIBUTE_MEMORY, ATTRIBUTE_MEMORY,
ATTRIBUTE_MEMORY_FREQ, ATTRIBUTE_MEMORY_FREQ,
@@ -52,17 +56,19 @@ static const char* ATTRIBUTE_FIELDS [] = {
"GPU processor:", "GPU processor:",
"Microarchitecture:", "Microarchitecture:",
"Technology:", "Technology:",
"Graphics Tier:",
"Max Frequency:", "Max Frequency:",
"SMs:", "SMs:",
"Cores/SM:", "Cores/SM:",
"CUDA Cores:", "CUDA Cores:",
"Tensor Cores:", "Tensor Cores:",
"Execution Units:",
"L2 Size:", "L2 Size:",
"Memory:", "Memory:",
"Memory frequency:", "Memory frequency:",
"Bus width:", "Bus width:",
"Peak Performance:", "Peak Performance:",
"Peak Performance (TC):", "Peak Performance (MMA):",
}; };
static const char* ATTRIBUTE_FIELDS_SHORT [] = { static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -70,17 +76,19 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Processor:", "Processor:",
"uArch:", "uArch:",
"Technology:", "Technology:",
"GT:",
"Max Freq.:", "Max Freq.:",
"SMs:", "SMs:",
"Cores/SM:", "Cores/SM:",
"CUDA Cores:", "CUDA Cores:",
"Tensor Cores:", "Tensor Cores:",
"EUs:",
"L2 Size:", "L2 Size:",
"Memory:", "Memory:",
"Memory freq.:", "Memory freq.:",
"Bus width:", "Bus width:",
"Peak Perf.:", "Peak Perf.:",
"Peak Perf.(TC):", "Peak Perf.(MMA):",
}; };
struct terminal { struct terminal {
@@ -200,23 +208,32 @@ void replace_bgbyfg_color(struct ascii_logo* logo) {
for(int i=0; i < 2; i++) { for(int i=0; i < 2; i++) {
if(logo->color_ascii[i] == NULL) break; if(logo->color_ascii[i] == NULL) break;
if(strcmp(logo->color_ascii[i], COLOR_BG_BLACK) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLACK); if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
else if(strcmp(logo->color_ascii[i], COLOR_BG_RED) == 0) strcpy(logo->color_ascii[i], COLOR_FG_RED); else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
else if(strcmp(logo->color_ascii[i], COLOR_BG_GREEN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_GREEN); else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
else if(strcmp(logo->color_ascii[i], COLOR_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], COLOR_FG_YELLOW); else if(strcmp(logo->color_ascii[i], C_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], C_FG_YELLOW);
else if(strcmp(logo->color_ascii[i], COLOR_BG_BLUE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLUE); else if(strcmp(logo->color_ascii[i], C_BG_BLUE) == 0) strcpy(logo->color_ascii[i], C_FG_BLUE);
else if(strcmp(logo->color_ascii[i], COLOR_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], COLOR_FG_MAGENTA); else if(strcmp(logo->color_ascii[i], C_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], C_FG_MAGENTA);
else if(strcmp(logo->color_ascii[i], COLOR_BG_CYAN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_CYAN); else if(strcmp(logo->color_ascii[i], C_BG_CYAN) == 0) strcpy(logo->color_ascii[i], C_FG_CYAN);
else if(strcmp(logo->color_ascii[i], COLOR_BG_WHITE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_WHITE); else if(strcmp(logo->color_ascii[i], C_BG_WHITE) == 0) strcpy(logo->color_ascii[i], C_FG_WHITE);
}
}
struct ascii_logo* choose_ascii_art_aux(struct ascii_logo* logo_long, struct ascii_logo* logo_short, struct terminal* term, int lf) {
if(ascii_fits_screen(term->w, *logo_long, lf)) {
return logo_long;
}
else {
return logo_short;
} }
} }
void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) { void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) {
if(art->vendor == GPU_VENDOR_NVIDIA) { if(art->vendor == GPU_VENDOR_NVIDIA) {
if(term != NULL && ascii_fits_screen(term->w, logo_nvidia_l, lf)) art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf);
art->art = &logo_nvidia_l; }
else else if(art->vendor == GPU_VENDOR_INTEL) {
art->art = &logo_nvidia; art->art = choose_ascii_art_aux(&logo_intel_l, &logo_intel, term, lf);
} }
else { else {
art->art = &logo_unknown; art->art = &logo_unknown;
@@ -228,10 +245,10 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
switch(art->style) { switch(art->style) {
case STYLE_LEGACY: case STYLE_LEGACY:
logo->replace_blocks = false; logo->replace_blocks = false;
strcpy(logo->color_text[0], COLOR_NONE); strcpy(logo->color_text[0], C_NONE);
strcpy(logo->color_text[1], COLOR_NONE); strcpy(logo->color_text[1], C_NONE);
strcpy(logo->color_ascii[0], COLOR_NONE); strcpy(logo->color_ascii[0], C_NONE);
strcpy(logo->color_ascii[1], COLOR_NONE); strcpy(logo->color_ascii[1], C_NONE);
art->reset[0] = '\0'; art->reset[0] = '\0';
break; break;
case STYLE_RETRO: case STYLE_RETRO:
@@ -245,7 +262,7 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true)); strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true));
strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true)); strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true));
} }
strcpy(art->reset, COLOR_RESET); strcpy(art->reset, C_RESET);
break; break;
case STYLE_INVALID: case STYLE_INVALID:
default: default:
@@ -342,6 +359,48 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
printf("\n"); printf("\n");
} }
#ifdef BACKEND_INTEL
bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
if(art == NULL)
return false;
char* gpu_name = get_str_gpu_name(gpu);
char* uarch = get_str_uarch_intel(gpu->arch);
char* gt = get_str_gt(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch);
char* eus = get_str_eu(gpu);
char* max_frequency = get_str_freq(gpu);
char* pp = get_str_peak_performance(gpu);
setAttribute(art, ATTRIBUTE_NAME, gpu_name);
setAttribute(art, ATTRIBUTE_UARCH, uarch);
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_GT, gt);
setAttribute(art, ATTRIBUTE_EUS, eus);
setAttribute(art, ATTRIBUTE_PEAK, pp);
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
uint32_t longest_field = longest_field_length(art, longest_attribute);
choose_ascii_art(art, cs, term, longest_field);
if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
// Despite of choosing the smallest logo, the output does not fit
// Choose the shorter field names and recalculate the longest attr
attribute_fields = ATTRIBUTE_FIELDS_SHORT;
longest_attribute = longest_attribute_length(art, attribute_fields);
}
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
return true;
}
#endif
#ifdef BACKEND_CUDA
bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) { bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
struct ascii* art = set_ascii(get_gpu_vendor(gpu), s); struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
@@ -350,7 +409,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
char* gpu_name = get_str_gpu_name(gpu); char* gpu_name = get_str_gpu_name(gpu);
char* gpu_chip = get_str_chip(gpu->arch); char* gpu_chip = get_str_chip(gpu->arch);
char* uarch = get_str_uarch(gpu->arch); char* uarch = get_str_uarch_cuda(gpu->arch);
char* comp_cap = get_str_cc(gpu->arch); char* comp_cap = get_str_cc(gpu->arch);
char* manufacturing_process = get_str_process(gpu->arch); char* manufacturing_process = get_str_process(gpu->arch);
char* sms = get_str_sm(gpu); char* sms = get_str_sm(gpu);
@@ -380,7 +439,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_STREAMINGMP, sms); setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm); setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm);
setAttribute(art, ATTRIBUTE_CUDA_CORES, cores); setAttribute(art, ATTRIBUTE_CUDA_CORES, cores);
if(gpu->topo->tensor_cores >= 0) { if(gpu->topo->tensor_cores > 0) {
setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc); setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc);
} }
setAttribute(art, ATTRIBUTE_MEMORY, mem); setAttribute(art, ATTRIBUTE_MEMORY, mem);
@@ -388,7 +447,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width); setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
setAttribute(art, ATTRIBUTE_L2, l2); setAttribute(art, ATTRIBUTE_L2, l2);
setAttribute(art, ATTRIBUTE_PEAK, pp); setAttribute(art, ATTRIBUTE_PEAK, pp);
if(gpu->topo->tensor_cores >= 0) { if(gpu->topo->tensor_cores > 0) {
setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor); setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
} }
@@ -416,6 +475,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
return true; return true;
} }
#endif
struct terminal* get_terminal_size() { struct terminal* get_terminal_size() {
struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal)); struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal));
@@ -448,5 +508,17 @@ struct terminal* get_terminal_size() {
bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) { bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) {
struct terminal* term = get_terminal_size(); struct terminal* term = get_terminal_size();
if(gpu->vendor == GPU_VENDOR_NVIDIA)
#ifdef BACKEND_CUDA
return print_gpufetch_cuda(gpu, s, cs, term); return print_gpufetch_cuda(gpu, s, cs, term);
#else
return false;
#endif
else {
#ifdef BACKEND_INTEL
return print_gpufetch_intel(gpu, s, cs, term);
#else
return false;
#endif
}
} }

28
src/common/uarch.cpp Normal file
View File

@@ -0,0 +1,28 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "global.hpp"
#include "uarch.hpp"
char* get_str_process(struct uarch* arch) {
char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
int32_t process = arch->process;
if(process == UNK) {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
}
else if(process > 100) {
sprintf(str, "%.2fum", (double)process/100);
}
else if(process > 0){
sprintf(str, "%dnm", process);
}
else {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
printBug("Found invalid process: '%d'", process);
}
return str;
}

31
src/common/uarch.hpp Normal file
View File

@@ -0,0 +1,31 @@
#ifndef __COMMON_UARCH__
#define __COMMON_UARCH__
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
typedef uint32_t GPUCHIP;
typedef uint32_t MICROARCH;
struct uarch {
// NVIDIA specific
int32_t cc_major;
int32_t cc_minor;
int32_t compute_capability;
// Intel specific
int32_t gt;
int32_t eu;
MICROARCH uarch;
GPUCHIP chip;
int32_t process;
char* uarch_str;
char* chip_str;
};
#endif

View File

@@ -1,10 +1,10 @@
#ifndef __GPUCHIPS__ #ifndef __CUDA_GPUCHIPS__
#define __GPUCHIPS__ #define __CUDA_GPUCHIPS__
typedef uint32_t GPUCHIP; typedef uint32_t GPUCHIP;
enum { enum {
CHIP_UNKNOWN, CHIP_UNKNOWN_CUDA,
CHIP_G80, CHIP_G80,
CHIP_G80GL, CHIP_G80GL,
CHIP_G84, CHIP_G84,

View File

@@ -6,40 +6,12 @@
#include "../common/pci.hpp" #include "../common/pci.hpp"
#include "../common/global.hpp" #include "../common/global.hpp"
int print_gpus_list() { bool print_gpu_cuda(struct gpu_info* gpu) {
cudaError_t err = cudaSuccess; char* cc = get_str_cc(gpu->arch);
int num_gpus = -1; printf("%s (Compute Capability %s)\n", gpu->name, cc);
free(cc);
if ((err = cudaGetDeviceCount(&num_gpus)) != cudaSuccess) { return true;
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return EXIT_FAILURE;
}
printf("CUDA GPUs available: %d\n", num_gpus);
if(num_gpus > 0) {
cudaDeviceProp deviceProp;
int max_len = 0;
for(int idx=0; idx < num_gpus; idx++) {
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return EXIT_FAILURE;
}
max_len = max(max_len, (int) strlen(deviceProp.name));
}
for(int i=0; i < max_len + 32; i++) putchar('-');
putchar('\n');
for(int idx=0; idx < num_gpus; idx++) {
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return EXIT_FAILURE;
}
printf("GPU %d: %s (Compute Capability %d.%d)\n", idx, deviceProp.name, deviceProp.major, deviceProp.minor);
}
}
return EXIT_SUCCESS;
} }
struct cache* get_cache_info(cudaDeviceProp prop) { struct cache* get_cache_info(cudaDeviceProp prop) {
@@ -104,16 +76,16 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
} }
// Compute peak performance when using CUDA cores // Compute peak performance when using CUDA cores
int64_t get_peak_performance(struct gpu_info* gpu) { int64_t get_peak_performance_cuda(struct gpu_info* gpu) {
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2; return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
} }
// Compute peak performance when using tensor cores // Compute peak performance when using tensor cores
int64_t get_peak_performance_t(struct gpu_info* gpu) { int64_t get_peak_performance_tcu(struct gpu_info* gpu) {
return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores; return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
} }
struct gpu_info* get_gpu_info(int gpu_idx) { struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL; gpu->pci = NULL;
gpu->idx = gpu_idx; gpu->idx = gpu_idx;
@@ -123,8 +95,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
return NULL; return NULL;
} }
if(gpu_idx == 0) {
printf("Waiting for CUDA driver to start..."); printf("Waiting for CUDA driver to start...");
fflush(stdout); fflush(stdout);
}
int num_gpus = -1; int num_gpus = -1;
cudaError_t err = cudaSuccess; cudaError_t err = cudaSuccess;
@@ -132,7 +106,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
return NULL; return NULL;
} }
if(gpu_idx == 0) {
printf("\r"); printf("\r");
}
if(num_gpus <= 0) { if(num_gpus <= 0) {
printErr("No CUDA capable devices found!"); printErr("No CUDA capable devices found!");
@@ -140,7 +117,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
} }
if(gpu->idx+1 > num_gpus) { if(gpu->idx+1 > num_gpus) {
printErr("Requested GPU index %d in a system with %d GPUs", gpu->idx, num_gpus); // Master is trying to query an invalid GPU
return NULL; return NULL;
} }
@@ -156,25 +133,17 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
strcpy(gpu->name, deviceProp.name); strcpy(gpu->name, deviceProp.name);
struct pci_dev *devices = get_pci_devices_from_pciutils(); struct pci_dev *devices = get_pci_devices_from_pciutils();
gpu->pci = get_pci_from_pciutils(devices); gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA);
gpu->arch = get_uarch_from_cuda(gpu); gpu->arch = get_uarch_from_cuda(gpu);
gpu->cach = get_cache_info(deviceProp); gpu->cach = get_cache_info(deviceProp);
gpu->mem = get_memory_info(gpu, deviceProp); gpu->mem = get_memory_info(gpu, deviceProp);
gpu->topo = get_topology_info(deviceProp); gpu->topo = get_topology_info(deviceProp);
gpu->peak_performance = get_peak_performance(gpu); gpu->peak_performance = get_peak_performance_cuda(gpu);
gpu->peak_performance_t = get_peak_performance_t(gpu); gpu->peak_performance_tcu = get_peak_performance_tcu(gpu);
return gpu; return gpu;
} }
char* get_str_generic(int32_t data) {
// Largest int is 10, +1 for possible negative, +1 for EOL
uint32_t max_size = 12;
char* dummy = (char *) ecalloc(max_size, sizeof(char));
snprintf(dummy, max_size, "%d", data);
return dummy;
}
char* get_str_sm(struct gpu_info* gpu) { char* get_str_sm(struct gpu_info* gpu) {
return get_str_generic(gpu->topo->streaming_mp); return get_str_generic(gpu->topo->streaming_mp);
} }

View File

@@ -1,10 +1,10 @@
#ifndef __CUDA__ #ifndef __CUDA_GPU__
#define __CUDA__ #define __CUDA_GPU__
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
struct gpu_info* get_gpu_info(int gpu_idx); struct gpu_info* get_gpu_info_cuda(int gpu_idx);
int print_gpus_list(); bool print_gpu_cuda(struct gpu_info* gpu);
char* get_str_sm(struct gpu_info* gpu); char* get_str_sm(struct gpu_info* gpu);
char* get_str_cores_sm(struct gpu_info* gpu); char* get_str_cores_sm(struct gpu_info* gpu);
char* get_str_cuda_cores(struct gpu_info* gpu); char* get_str_cuda_cores(struct gpu_info* gpu);

View File

@@ -8,21 +8,7 @@
#define CHECK_PCI_START if (false) {} #define CHECK_PCI_START if (false) {}
#define CHECK_PCI(pci, id, chip) \ #define CHECK_PCI(pci, id, chip) \
else if (pci->device_id == id) return chip; else if (pci->device_id == id) return chip;
#define CHECK_PCI_END else { printBug("TODOO"); return CHIP_UNKNOWN; } #define CHECK_PCI_END else { printBug("Unkown CUDA device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_CUDA; }
struct pci {
uint16_t vendor_id;
uint16_t device_id;
};
struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
pci->vendor_id = pciutils_get_pci_vendor_id(devices);
pci->device_id = pciutils_get_pci_device_id(devices);
return pci;
}
/* /*
* pci ids were retrieved using https://github.com/pciutils/pciids * pci ids were retrieved using https://github.com/pciutils/pciids
@@ -33,7 +19,7 @@ struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
* or in pci.ids itself) * or in pci.ids itself)
*/ */
GPUCHIP get_chip_from_pci(struct pci* pci) { GPUCHIP get_chip_from_pci_cuda(struct pci* pci) {
CHECK_PCI_START CHECK_PCI_START
CHECK_PCI(pci, 0x25e5, CHIP_GA107BM) CHECK_PCI(pci, 0x25e5, CHIP_GA107BM)
CHECK_PCI(pci, 0x25e2, CHIP_GA107BM) CHECK_PCI(pci, 0x25e2, CHIP_GA107BM)

View File

@@ -6,9 +6,14 @@
#include "../common/pci.hpp" #include "../common/pci.hpp"
#include "chips.hpp" #include "chips.hpp"
/*
* doc: https://wiki.osdev.org/PCI#Class_Codes
* https://pci-ids.ucw.cz/read/PC
*/
#define PCI_VENDOR_ID_NVIDIA 0x10de
struct pci; struct pci;
struct pci* get_pci_from_pciutils(struct pci_dev *devices); GPUCHIP get_chip_from_pci_cuda(struct pci* pci);
GPUCHIP get_chip_from_pci(struct pci* pci);
#endif #endif

View File

@@ -3,21 +3,14 @@
#include <stdint.h> #include <stdint.h>
#include <cstddef> #include <cstddef>
#include "../common/uarch.hpp"
#include "../common/global.hpp" #include "../common/global.hpp"
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
#include "chips.hpp" #include "chips.hpp"
typedef uint32_t MICROARCH;
// Any clock multiplier // Any clock multiplier
#define CM_ANY -1 #define CM_ANY -1
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
// MICROARCH values // MICROARCH values
enum { enum {
UARCH_UNKNOWN, UARCH_UNKNOWN,
@@ -43,23 +36,10 @@ static const char *uarch_str[] = {
/*[ARCH_AMPERE] = */ "Ampere", /*[ARCH_AMPERE] = */ "Ampere",
}; };
struct uarch {
int32_t cc_major;
int32_t cc_minor;
int32_t compute_capability;
MICROARCH uarch;
GPUCHIP chip;
int32_t process;
char* uarch_str;
char* chip_str;
};
#define CHECK_UARCH_START if (false) {} #define CHECK_UARCH_START if (false) {}
#define CHECK_UARCH(arch, chip_, str, uarch, process) \ #define CHECK_UARCH(arch, chip_, str, uarch, process) \
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process); else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); } #define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) { void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1)); arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
@@ -74,7 +54,7 @@ void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t proce
* o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip * o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip
* o CHIP_XXXM: indicates a mobile chip * o CHIP_XXXM: indicates a mobile chip
*/ */
void map_chip_to_uarch(struct uarch* arch) { void map_chip_to_uarch_cuda(struct uarch* arch) {
CHECK_UARCH_START CHECK_UARCH_START
// TESLA (1.0, 1.1, 1.2, 1.3) // // TESLA (1.0, 1.1, 1.2, 1.3) //
CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90) CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90)
@@ -263,9 +243,8 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
arch->cc_major = deviceProp.major; arch->cc_major = deviceProp.major;
arch->cc_minor = deviceProp.minor; arch->cc_minor = deviceProp.minor;
arch->compute_capability = deviceProp.major * 10 + deviceProp.minor; arch->compute_capability = deviceProp.major * 10 + deviceProp.minor;
arch->chip = get_chip_from_pci(gpu->pci); arch->chip = get_chip_from_pci_cuda(gpu->pci);
map_chip_to_uarch_cuda(arch);
map_chip_to_uarch(arch);
return arch; return arch;
} }
@@ -335,10 +314,6 @@ MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
CHECK_MEMTYPE_END CHECK_MEMTYPE_END
} }
const char* get_str_uarch(struct uarch* arch) {
return uarch_str[arch->uarch];
}
char* get_str_cc(struct uarch* arch) { char* get_str_cc(struct uarch* arch) {
uint32_t max_size = 4; uint32_t max_size = 4;
char* cc = (char *) ecalloc(max_size, sizeof(char)); char* cc = (char *) ecalloc(max_size, sizeof(char));
@@ -346,31 +321,14 @@ char* get_str_cc(struct uarch* arch) {
return cc; return cc;
} }
char* get_str_process(struct uarch* arch) {
char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
int32_t process = arch->process;
if(process == UNK) {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
}
else if(process > 100) {
sprintf(str, "%.2fum", (double)process/100);
}
else if(process > 0){
sprintf(str, "%dnm", process);
}
else {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
printBug("Found invalid process: '%d'", process);
}
return str;
}
char* get_str_chip(struct uarch* arch) { char* get_str_chip(struct uarch* arch) {
return arch->chip_str; return arch->chip_str;
} }
const char* get_str_uarch_cuda(struct uarch* arch) {
return uarch_str[arch->uarch];
}
void free_uarch_struct(struct uarch* arch) { void free_uarch_struct(struct uarch* arch) {
free(arch->uarch_str); free(arch->uarch_str);
free(arch->chip_str); free(arch->chip_str);

View File

@@ -1,5 +1,5 @@
#ifndef __UARCH__ #ifndef __CUDA_UARCH__
#define __UARCH__ #define __CUDA_UARCH__
#include "../common/gpu.hpp" #include "../common/gpu.hpp"
@@ -8,7 +8,7 @@ struct uarch;
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu); struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
bool clkm_possible_for_uarch(int clkm, struct uarch* arch); bool clkm_possible_for_uarch(int clkm, struct uarch* arch);
MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch); MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
char* get_str_uarch(struct uarch* arch); char* get_str_uarch_cuda(struct uarch* arch);
char* get_str_cc(struct uarch* arch); char* get_str_cc(struct uarch* arch);
char* get_str_chip(struct uarch* arch); char* get_str_chip(struct uarch* arch);
char* get_str_process(struct uarch* arch); char* get_str_process(struct uarch* arch);

12
src/intel/check.sh Executable file
View File

@@ -0,0 +1,12 @@
#!/bin/bash -u
# Checks the difference between supported uarchs
# and uarchs that have their topology available
# in file uarch.cpp
uarchs="$(grep 'CHECK_UARCH' uarch.cpp | cut -d',' -f4-5 | grep 'UARCH_GEN' | tr -d ' ' | sort | uniq)"
topos="$(grep 'CHECK_TOPO' uarch.cpp | cut -d',' -f3,4 | grep 'UARCH_' | tr -d ' ' | sort | uniq)"
echo "$uarchs" > /tmp/uarchs.txt
echo "$topos" > /tmp/topos.txt
meld /tmp/uarchs.txt /tmp/topos.txt
rm -f /tmp/uarchs.txt /tmp/topos.txt

59
src/intel/chips.hpp Normal file
View File

@@ -0,0 +1,59 @@
#ifndef __INTEL_GPUCHIPS__
#define __INTEL_GPUCHIPS__
#include <stdint.h>
typedef uint32_t GPUCHIP;
enum {
CHIP_UNKNOWN_INTEL,
// Gen6
CHIP_HD_2000,
CHIP_HD_3000,
// Gen7
CHIP_HD_2500,
CHIP_HD_4000,
CHIP_HD_P4000,
// Gen7.5
CHIP_HD_4200,
CHIP_HD_4400,
CHIP_HD_4600,
CHIP_HD_P4600,
CHIP_IRIS_5100,
CHIP_IRISP_5200,
CHIP_IRISP_P5200,
// Gen8
CHIP_HD_5300,
CHIP_HD_5500,
CHIP_HD_5600,
CHIP_HD_P5700,
CHIP_HD_6000,
CHIP_IRIS_6100,
CHIP_IRISP_6200,
CHIP_IRISP_P6300,
// Gen9
CHIP_HD_510,
CHIP_HD_515,
CHIP_HD_520,
CHIP_HD_530,
CHIP_HD_P530,
CHIP_HD_540,
CHIP_HD_550,
CHIP_IRIS_P555,
CHIP_IRIS_580,
CHIP_IRIS_P580,
// Gen9.5
CHIP_UHD_600,
CHIP_UHD_605,
CHIP_UHD_620,
CHIP_UHD_630,
CHIP_HD_610,
CHIP_HD_615,
CHIP_HD_620,
CHIP_HD_630,
CHIP_HD_P630,
CHIP_IRISP_640,
CHIP_IRISP_650,
};
#endif

46
src/intel/intel.cpp Normal file
View File

@@ -0,0 +1,46 @@
#include <stdio.h>
#include <string.h>
#include "intel.hpp"
#include "uarch.hpp"
#include "chips.hpp"
#include "udev.hpp"
#include "../common/pci.hpp"
#include "../common/global.hpp"
int64_t get_peak_performance_intel(struct gpu_info* gpu) {
return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2;
}
struct gpu_info* get_gpu_info_intel() {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->vendor = GPU_VENDOR_INTEL;
struct pci_dev *devices = get_pci_devices_from_pciutils();
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
if(gpu->pci == NULL) {
// No Intel iGPU found in PCI, which means it is not present
return NULL;
}
gpu->arch = get_uarch_from_pci(gpu->pci);
gpu->name = get_name_from_uarch(gpu->arch);
gpu->topo_i = get_topology_info(gpu->arch);
gpu->freq = get_max_freq_from_file(gpu->pci);
gpu->peak_performance = get_peak_performance_intel(gpu);
return gpu;
}
bool print_gpu_intel(struct gpu_info* gpu) {
if(gpu->vendor != GPU_VENDOR_INTEL) return false;
printf("Intel %s\n", gpu->name);
return true;
}
char* get_str_eu(struct gpu_info* gpu) {
return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice);
}

10
src/intel/intel.hpp Normal file
View File

@@ -0,0 +1,10 @@
#ifndef __INTEL_GPU__
#define __INTEL_GPU__
#include "../common/gpu.hpp"
struct gpu_info* get_gpu_info_intel();
bool print_gpu_intel(struct gpu_info* gpu);
char* get_str_eu(struct gpu_info* gpu);
#endif

88
src/intel/pci.cpp Normal file
View File

@@ -0,0 +1,88 @@
#include <stdio.h>
#include "pci.hpp"
#include "chips.hpp"
#include "../common/global.hpp"
#include "../common/pci.hpp"
#define CHECK_PCI_START if (false) {}
#define CHECK_PCI(pci, id, chip) \
else if (pci->device_id == id) return chip;
#define CHECK_PCI_END else { printBug("Unkown Intel device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_INTEL; }
/*
* https://github.com/mesa3d/mesa/blob/main/include/pci_ids/i965_pci_ids.h
*/
GPUCHIP get_chip_from_pci_intel(struct pci* pci) {
CHECK_PCI_START
// Gen6
CHECK_PCI(pci, 0x0102, CHIP_HD_2000)
CHECK_PCI(pci, 0x0106, CHIP_HD_2000)
CHECK_PCI(pci, 0x010A, CHIP_HD_2000)
CHECK_PCI(pci, 0x0112, CHIP_HD_3000)
CHECK_PCI(pci, 0x0122, CHIP_HD_3000)
CHECK_PCI(pci, 0x0116, CHIP_HD_3000)
CHECK_PCI(pci, 0x0126, CHIP_HD_3000)
// Gen7
CHECK_PCI(pci, 0x0152, CHIP_HD_2500)
CHECK_PCI(pci, 0x0156, CHIP_HD_2500)
CHECK_PCI(pci, 0x0162, CHIP_HD_4000)
CHECK_PCI(pci, 0x0166, CHIP_HD_4000)
CHECK_PCI(pci, 0x016a, CHIP_HD_P4000)
// Gen7.5
CHECK_PCI(pci, 0x0A1E, CHIP_HD_4200)
CHECK_PCI(pci, 0x041E, CHIP_HD_4400)
CHECK_PCI(pci, 0x0A16, CHIP_HD_4400)
CHECK_PCI(pci, 0x0412, CHIP_HD_4600)
CHECK_PCI(pci, 0x0416, CHIP_HD_4600)
CHECK_PCI(pci, 0x0D12, CHIP_HD_4600)
CHECK_PCI(pci, 0x041A, CHIP_HD_P4600)
CHECK_PCI(pci, 0x0A2E, CHIP_IRIS_5100)
CHECK_PCI(pci, 0x0D22, CHIP_IRISP_5200)
CHECK_PCI(pci, 0x0D26, CHIP_IRISP_P5200)
// Gen8
CHECK_PCI(pci, 0x161E, CHIP_HD_5300)
CHECK_PCI(pci, 0x1616, CHIP_HD_5500)
CHECK_PCI(pci, 0x1612, CHIP_HD_5600)
CHECK_PCI(pci, 0x161A, CHIP_HD_P5700)
CHECK_PCI(pci, 0x1626, CHIP_HD_6000)
CHECK_PCI(pci, 0x162B, CHIP_IRIS_6100)
CHECK_PCI(pci, 0x1622, CHIP_IRISP_6200)
CHECK_PCI(pci, 0x162A, CHIP_IRISP_P6300)
// Gen9
CHECK_PCI(pci, 0x1902, CHIP_HD_510)
CHECK_PCI(pci, 0x1906, CHIP_HD_510)
CHECK_PCI(pci, 0x190B, CHIP_HD_510)
CHECK_PCI(pci, 0x191E, CHIP_HD_515)
CHECK_PCI(pci, 0x1916, CHIP_HD_520)
CHECK_PCI(pci, 0x1921, CHIP_HD_520)
CHECK_PCI(pci, 0x1912, CHIP_HD_530)
CHECK_PCI(pci, 0x191B, CHIP_HD_530)
CHECK_PCI(pci, 0x191D, CHIP_HD_P530)
/*CHECK_PCI(pci, 0x5917, CHIP_HD_540)
CHECK_PCI(pci, 0x5917, CHIP_HD_550)
CHECK_PCI(pci, 0x5917, CHIP_HD_P555)
CHECK_PCI(pci, 0x5917, CHIP_HD_580)
CHECK_PCI(pci, 0x5917, CHIP_HD_P580)*/
// Gen9.5
CHECK_PCI(pci, 0x3185, CHIP_UHD_600)
CHECK_PCI(pci, 0x3184, CHIP_UHD_605)
CHECK_PCI(pci, 0x5917, CHIP_UHD_620)
CHECK_PCI(pci, 0x3E91, CHIP_UHD_630)
CHECK_PCI(pci, 0x3E92, CHIP_UHD_630)
CHECK_PCI(pci, 0x3E98, CHIP_UHD_630)
CHECK_PCI(pci, 0x3E9B, CHIP_UHD_630)
CHECK_PCI(pci, 0x9BC5, CHIP_UHD_630)
CHECK_PCI(pci, 0x9BC8, CHIP_UHD_630)
CHECK_PCI(pci, 0x5902, CHIP_HD_610)
CHECK_PCI(pci, 0x5906, CHIP_HD_610)
CHECK_PCI(pci, 0x590B, CHIP_HD_610)
CHECK_PCI(pci, 0x591E, CHIP_HD_615)
CHECK_PCI(pci, 0x5912, CHIP_HD_630)
CHECK_PCI(pci, 0x591B, CHIP_HD_630)
CHECK_PCI(pci, 0x591A, CHIP_HD_P630)
CHECK_PCI(pci, 0x591D, CHIP_HD_P630)
CHECK_PCI(pci, 0x5926, CHIP_IRISP_640)
CHECK_PCI(pci, 0x5927, CHIP_IRISP_650)
CHECK_PCI_END
}

19
src/intel/pci.hpp Normal file
View File

@@ -0,0 +1,19 @@
#ifndef __PCI_INTEL__
#define __PCI_INTEL__
#include <stdint.h>
#include "../common/pci.hpp"
#include "chips.hpp"
/*
* doc: https://wiki.osdev.org/PCI#Class_Codes
* https://pci-ids.ucw.cz/read/PC
*/
#define PCI_VENDOR_ID_INTEL 0x8086
struct pci;
GPUCHIP get_chip_from_pci_intel(struct pci* pci);
#endif

212
src/intel/uarch.cpp Normal file
View File

@@ -0,0 +1,212 @@
#include <stdint.h>
#include <cstddef>
#include <string.h>
#include <stdio.h>
#include "../common/uarch.hpp"
#include "../common/global.hpp"
#include "../common/gpu.hpp"
#include "chips.hpp"
#include "pci.hpp"
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
/*
* Mapping between iGPU and CPU uarchs
* -----------------------------------
* Gen6: Sandy Bridge (2th Gen)
* Gen7: Ivy Brdige (3th Gen)
* Gen7.5: Haswell (4th Gen)
* Gen8: Broadwell (5th Gen)
* Gen9: Skylake (6th Gen)
* Gen9.5: Kaby Lake
*/
enum {
UARCH_UNKNOWN,
UARCH_GEN6,
UARCH_GEN7,
UARCH_GEN7_5,
UARCH_GEN8,
UARCH_GEN9,
UARCH_GEN9_5,
};
static const char *uarch_str[] = {
/*[ARCH_UNKNOWN = */ STRING_UNKNOWN,
/*[ARCH_GEN6] = */ "Gen6",
/*[ARCH_GEN7] = */ "Gen7",
/*[ARCH_GEN7_5] = */ "Gen7.5",
/*[ARCH_GEN8] = */ "Gen8",
/*[ARCH_GEN9] = */ "Gen9",
/*[ARCH_GEN9_5] = */ "Gen9.5",
};
// Graphic Tiers (GT)
enum {
GT_UNKNOWN,
GT1,
GT1_5,
GT2,
GT3,
GT3e,
GT4e
};
static const char *gt_str[] = {
/*[GT_UNKNOWN] = */ STRING_UNKNOWN,
/*[GT1] = */ "GT1",
/*[GT1_5] = */ "GT1.5",
/*[GT2] = */ "GT2",
/*[GT3] = */ "GT3",
/*[GT3e] = */ "GT3e",
/*[GT4e] = */ "GT4e",
};
#define CHECK_UARCH_START if (false) {}
#define CHECK_UARCH(arch, chip_, str, uarch, gt, process) \
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch_intel: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
#define CHECK_TOPO_START if (false) {}
#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \
else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli);
#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); }
void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) {
topo_i->slices = sli;
topo_i->subslices = sub;
topo_i->eu_subslice = eu_sub;
}
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->chip_str, str);
arch->uarch = u;
arch->process = process;
arch->gt = gt;
}
void map_chip_to_uarch_intel(struct uarch* arch) {
CHECK_UARCH_START
// Gen6
CHECK_UARCH(arch, CHIP_HD_2000, "HD Graphics 2000", UARCH_GEN6, GT1, 32)
CHECK_UARCH(arch, CHIP_HD_3000, "HD Graphics 3000", UARCH_GEN6, GT2, 32)
// Gen7
CHECK_UARCH(arch, CHIP_HD_2500, "HD Graphics 2500", UARCH_GEN7, GT1, 22)
CHECK_UARCH(arch, CHIP_HD_4000, "HD Graphics 4000", UARCH_GEN7, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_P4000, "HD Graphics P4000", UARCH_GEN7, GT2, 22)
// Gen7.5
CHECK_UARCH(arch, CHIP_HD_4200, "HD Graphics 4200", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_4400, "HD Graphics 4400", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_4600, "HD Graphics 4600", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_HD_P4600, "HD Graphics P4600", UARCH_GEN7_5, GT2, 22)
CHECK_UARCH(arch, CHIP_IRIS_5100, "HD Iris 5100", UARCH_GEN7_5, GT3, 22)
CHECK_UARCH(arch, CHIP_IRISP_5200, "HD Iris Pro 5200", UARCH_GEN7_5, GT3, 22)
CHECK_UARCH(arch, CHIP_IRISP_P5200, "HD Iris Pro P5200", UARCH_GEN7_5, GT3, 22)
// Gen8
CHECK_UARCH(arch, CHIP_HD_5300, "HD Graphics 5300", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_5500, "HD Graphics 5500", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_5600, "HD Graphics 5600", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_P5700, "HD Graphics P5700", UARCH_GEN8, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_6000, "HD Graphics 6000", UARCH_GEN8, GT3, 14)
CHECK_UARCH(arch, CHIP_IRIS_6100, "Iris Graphics 6100", UARCH_GEN8, GT3, 14)
CHECK_UARCH(arch, CHIP_IRISP_6200, "Iris Pro Graphics 6200", UARCH_GEN8, GT3, 14)
CHECK_UARCH(arch, CHIP_IRISP_P6300, "Iris Pro Graphics P6300", UARCH_GEN8, GT3, 14)
// Gen9
CHECK_UARCH(arch, CHIP_HD_510, "HD Graphics 510", UARCH_GEN9, GT1, 14)
CHECK_UARCH(arch, CHIP_HD_515, "HD Graphics 515", UARCH_GEN9, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_520, "HD Graphics 520", UARCH_GEN9, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_530, "HD Graphics 530", UARCH_GEN9, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_P530, "HD Graphics P530", UARCH_GEN9, GT2, 14)
// Gen9.5
CHECK_UARCH(arch, CHIP_UHD_600, "UHD Graphics 600", UARCH_GEN9_5, GT1, 14)
CHECK_UARCH(arch, CHIP_UHD_605, "UHD Graphics 605", UARCH_GEN9_5, GT1_5, 14)
CHECK_UARCH(arch, CHIP_UHD_620, "UHD Graphics 620", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_UHD_630, "UHD Graphics 630", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_610, "HD Graphics 610", UARCH_GEN9_5, GT1, 14)
CHECK_UARCH(arch, CHIP_HD_615, "HD Graphics 615", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_630, "HD Graphics 630", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_HD_P630, "HD Graphics P630", UARCH_GEN9_5, GT2, 14)
CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 640", UARCH_GEN9_5, GT3e, 14)
CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 650", UARCH_GEN9_5, GT3e, 14)
CHECK_UARCH_END
}
const char* get_str_uarch_intel(struct uarch* arch) {
return uarch_str[arch->uarch];
}
const char* get_str_gt(struct uarch* arch) {
return gt_str[arch->gt];
}
struct uarch* get_uarch_from_pci(struct pci* pci) {
struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
arch->chip_str = NULL;
arch->chip = get_chip_from_pci_intel(pci);
if(arch->chip == CHIP_UNKNOWN_INTEL) {
return NULL;
}
else {
map_chip_to_uarch_intel(arch);
return arch;
}
}
char* get_name_from_uarch(struct uarch* arch) {
char* name = (char *) emalloc(sizeof(char) * (strlen(arch->chip_str) + 6 + 1));
sprintf(name, "Intel %s", arch->chip_str);
return name;
}
/*
* Refs:
* Gen6: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen6
* Gen7/7.5: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen7
"The Compute Architecture of Intel Processor Graphics Gen7.5, v1.0"
* Gen8: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen8
"The Compute Architecture of Intel Processor Graphics Gen8, v1.1"
* Gen9: https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration
"The Compute Architecture of Intel Processor Graphics Gen9, v1.0"
* Gen9.5: https://en.wikichip.org/wiki/intel/microarchitectures/gen9.5#Configuration
*/
struct topology_i* get_topology_info(struct uarch* arch) {
struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i));
// Syntax: (EU per subslice, Subslices, Slices)
CHECK_TOPO_START
// Gen6
CHECK_TOPO(topo, arch, UARCH_GEN6, GT1, 6, 1, 1)
CHECK_TOPO(topo, arch, UARCH_GEN6, GT2, 6, 2, 1)
// Gen7
CHECK_TOPO(topo, arch, UARCH_GEN7, GT1, 6, 1, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7, GT2, 8, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7, GT3, 6, 1, 1)
// Gen7.5
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT1, 10, 1, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT2, 10, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT3, 10, 4, 1)
// Gen8
CHECK_TOPO(topo, arch, UARCH_GEN8, GT1, 6, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN8, GT2, 8, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN8, GT3, 8, 6, 2)
// Gen9
CHECK_TOPO(topo, arch, UARCH_GEN9, GT1, 6, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9, GT2, 8, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9, GT3, 8, 6, 2)
CHECK_TOPO(topo, arch, UARCH_GEN9, GT4e, 8, 9, 3)
// Gen9.5
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1, 6, 2, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1_5, 6, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2, 8, 3, 1)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3, 8, 6, 2)
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3e, 8, 6, 2) // Same as GT3, but has eDRAM cache
CHECK_TOPO_END
return topo;
}

14
src/intel/uarch.hpp Normal file
View File

@@ -0,0 +1,14 @@
#ifndef __INTEL_UARCH__
#define __INTEL_UARCH__
#include "../common/gpu.hpp"
struct uarch;
struct uarch* get_uarch_from_pci(struct pci* pci);
char* get_name_from_uarch(struct uarch* arch);
char* get_str_gt(struct uarch* arch);
char* get_str_uarch_intel(struct uarch* arch);
struct topology_i* get_topology_info(struct uarch* arch);
#endif

89
src/intel/udev.cpp Normal file
View File

@@ -0,0 +1,89 @@
#include <cstddef>
#include <cstring>
#include <cstdlib>
#include <cstdint>
#include <cerrno>
#include <cstdio>
#include <fcntl.h>
#include <unistd.h>
#include "../common/global.hpp"
#include "../common/pci.hpp"
#define _PATH_SYS_SYSTEM "/sys/devices/pci0000:00"
#define _PATH_SYS_DRM "/drm"
#define _PATH_CARD "/card0"
#define _PATH_FREQUENCY_MAX "/gt_max_freq_mhz"
#define _PATH_FREQUENCY_MIN "/gt_min_freq_mhz"
#define _PATH_FREQUENCY_MAX_LEN 100
#define DEFAULT_FILE_SIZE 4096
#define UNKNOWN_DATA -1
char* read_file(char* path, int* len) {
int fd = open(path, O_RDONLY);
if(fd == -1) {
return NULL;
}
//File exists, read it
int bytes_read = 0;
int offset = 0;
int block = 128;
char* buf = (char *) emalloc(sizeof(char)*DEFAULT_FILE_SIZE);
memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE);
while ( (bytes_read = read(fd, buf+offset, block)) > 0 ) {
offset += bytes_read;
}
if (close(fd) == -1) {
return NULL;
}
*len = offset;
return buf;
}
long get_freq_from_file(char* path) {
int filelen;
char* buf;
if((buf = read_file(path, &filelen)) == NULL) {
printWarn("Could not open '%s'", path);
return UNKNOWN_DATA;
}
char* end;
errno = 0;
long ret = strtol(buf, &end, 10);
if(errno != 0) {
printBug("strtol: %s", strerror(errno));
free(buf);
return UNKNOWN_DATA;
}
// We will be getting the frequency in MHz
// We consider it is an error if frequency is
// greater than 10 GHz or less than 100 MHz
if(ret > 10000 || ret < 100) {
printBug("Invalid data was read from file '%s': %ld\n", path, ret);
return UNKNOWN_DATA;
}
free(buf);
return ret;
}
long get_max_freq_from_file(struct pci* pci) {
char path[_PATH_FREQUENCY_MAX_LEN];
sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MAX);
return get_freq_from_file(path);
}
long get_min_freq_from_file(struct pci* pci) {
char path[_PATH_FREQUENCY_MAX_LEN];
sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MIN);
return get_freq_from_file(path);
}

7
src/intel/udev.hpp Normal file
View File

@@ -0,0 +1,7 @@
#ifndef __UDEV__
#define __UDEV__
long get_max_freq_from_file(struct pci* pci);
long get_min_freq_from_file(struct pci* pci);
#endif