[v0.20] Merge Intel iGPU branch for preeliminary Intel GPU support
This commit is contained in:
110
CMakeLists.txt
110
CMakeLists.txt
@@ -7,23 +7,22 @@ project(gpufetch CXX)
|
||||
set(SRC_DIR "src")
|
||||
set(COMMON_DIR "${SRC_DIR}/common")
|
||||
set(CUDA_DIR "${SRC_DIR}/cuda")
|
||||
set(INTEL_DIR "${SRC_DIR}/intel")
|
||||
|
||||
if(NOT WIN32)
|
||||
string(ASCII 27 Esc)
|
||||
set(ColorReset "${Esc}[m")
|
||||
set(ColorBold "${Esc}[1m")
|
||||
set(Red "${Esc}[31m")
|
||||
set(Green "${Esc}[32m")
|
||||
set(BoldRed "${Esc}[1;31m")
|
||||
set(BoldGreen "${Esc}[1;32m")
|
||||
set(BoldYellow "${Esc}[1;33m")
|
||||
if(NOT DEFINED ENABLE_INTEL_BACKEND)
|
||||
set(ENABLE_INTEL_BACKEND true)
|
||||
endif()
|
||||
|
||||
check_language(CUDA)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
enable_language(CUDA)
|
||||
else()
|
||||
message(FATAL_ERROR "${BoldRed}[ERROR]${ColorReset} Unable to find CUDA compiler. You may use -DCMAKE_CUDA_COMPILER and -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT if CUDA is installed but not detected by CMake")
|
||||
if(NOT DEFINED ENABLE_CUDA_BACKEND OR ENABLE_CUDA_BACKEND)
|
||||
check_language(CUDA)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
enable_language(CUDA)
|
||||
set(ENABLE_CUDA_BACKEND true)
|
||||
# Must link_directories early so add_executable(gpufetch ...) gets the right directories
|
||||
link_directories(cuda_backend ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
|
||||
else()
|
||||
set(ENABLE_CUDA_BACKEND false)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
|
||||
@@ -48,34 +47,73 @@ else()
|
||||
link_libraries(${PCIUTILS_LIBRARIES})
|
||||
endif()
|
||||
|
||||
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
|
||||
set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
|
||||
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic")
|
||||
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11")
|
||||
|
||||
# https://en.wikipedia.org/w/index.php?title=CUDA§ion=5#GPUs_supported
|
||||
# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
|
||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86)
|
||||
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75)
|
||||
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72)
|
||||
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
|
||||
if(ENABLE_INTEL_BACKEND)
|
||||
target_compile_definitions(gpufetch PUBLIC BACKEND_INTEL)
|
||||
|
||||
add_library(intel_backend STATIC ${INTEL_DIR}/intel.cpp ${INTEL_DIR}/pci.cpp ${INTEL_DIR}/uarch.cpp ${INTEL_DIR}/udev.cpp)
|
||||
|
||||
if(NOT ${PCIUTILS_FOUND})
|
||||
add_dependencies(intel_backend pciutils)
|
||||
endif()
|
||||
|
||||
target_link_libraries(gpufetch intel_backend)
|
||||
endif()
|
||||
|
||||
link_directories(${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
|
||||
if(ENABLE_CUDA_BACKEND)
|
||||
target_compile_definitions(gpufetch PUBLIC BACKEND_CUDA)
|
||||
|
||||
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
|
||||
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp)
|
||||
# https://en.wikipedia.org/w/index.php?title=CUDA§ion=5#GPUs_supported
|
||||
# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
|
||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86)
|
||||
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75)
|
||||
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72)
|
||||
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
|
||||
endif()
|
||||
|
||||
if(NOT ${PCIUTILS_FOUND})
|
||||
add_dependencies(cuda_backend pciutils)
|
||||
add_dependencies(gpufetch pciutils)
|
||||
add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
|
||||
|
||||
if(NOT ${PCIUTILS_FOUND})
|
||||
add_dependencies(cuda_backend pciutils)
|
||||
endif()
|
||||
|
||||
target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
|
||||
|
||||
target_link_libraries(cuda_backend PRIVATE cudart)
|
||||
target_link_libraries(gpufetch cuda_backend)
|
||||
endif()
|
||||
|
||||
target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
|
||||
|
||||
target_link_libraries(cuda_backend cudart)
|
||||
target_link_libraries(gpufetch cuda_backend pci z)
|
||||
|
||||
target_link_libraries(gpufetch pci z)
|
||||
install(TARGETS gpufetch DESTINATION bin)
|
||||
|
||||
if(NOT WIN32)
|
||||
string(ASCII 27 Esc)
|
||||
set(ColorReset "${Esc}[m")
|
||||
set(ColorBold "${Esc}[1m")
|
||||
set(Red "${Esc}[31m")
|
||||
set(Green "${Esc}[32m")
|
||||
set(BoldRed "${Esc}[1;31m")
|
||||
set(BoldGreen "${Esc}[1;32m")
|
||||
set(BoldYellow "${Esc}[1;33m")
|
||||
endif()
|
||||
|
||||
message(STATUS "----------------------")
|
||||
message(STATUS "gpufetch build report:")
|
||||
if(ENABLE_INTEL_BACKEND)
|
||||
message(STATUS "Intel backend: ${BoldGreen}ON${ColorReset}")
|
||||
else()
|
||||
message(STATUS "Intel backend: ${BoldRed}OFF${ColorReset}")
|
||||
endif()
|
||||
if(ENABLE_CUDA_BACKEND)
|
||||
message(STATUS "CUDA backend: ${BoldGreen}ON${ColorReset}")
|
||||
else()
|
||||
message(STATUS "CUDA backend: ${BoldRed}OFF${ColorReset}")
|
||||
endif()
|
||||
message(STATUS "----------------------")
|
||||
|
||||
@@ -13,8 +13,13 @@
|
||||
#define NUM_COLORS 4
|
||||
|
||||
#define COLOR_STR_NVIDIA "nvidia"
|
||||
#define COLOR_STR_INTEL "intel"
|
||||
|
||||
#define COLOR_DEFAULT_NVIDIA "118,185,0:255,255,255:255,255,255:118,185,0"
|
||||
// +-----------------------+-----------------------+
|
||||
// | Color logo | Color text |
|
||||
// | Color 1 | Color 2 | Color 1 | Color 2 |
|
||||
#define COLOR_DEFAULT_NVIDIA "118,185,000:255,255,255:255,255,255:118,185,000"
|
||||
#define COLOR_DEFAULT_INTEL "015,125,194:230,230,230:040,150,220:230,230,230"
|
||||
|
||||
struct args_struct {
|
||||
bool help_flag;
|
||||
@@ -145,6 +150,7 @@ bool parse_color(char* optarg_str, struct color*** cs) {
|
||||
bool free_ptr = true;
|
||||
|
||||
if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA;
|
||||
else if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) color_to_copy = COLOR_DEFAULT_INTEL;
|
||||
else {
|
||||
str_to_parse = optarg_str;
|
||||
free_ptr = false;
|
||||
|
||||
@@ -1,32 +1,32 @@
|
||||
#ifndef __ASCII__
|
||||
#define __ASCII__
|
||||
|
||||
#define COLOR_NONE ""
|
||||
#define COLOR_FG_BLACK "\x1b[30;1m"
|
||||
#define COLOR_FG_RED "\x1b[31;1m"
|
||||
#define COLOR_FG_GREEN "\x1b[32;1m"
|
||||
#define COLOR_FG_YELLOW "\x1b[33;1m"
|
||||
#define COLOR_FG_BLUE "\x1b[34;1m"
|
||||
#define COLOR_FG_MAGENTA "\x1b[35;1m"
|
||||
#define COLOR_FG_CYAN "\x1b[36;1m"
|
||||
#define COLOR_FG_WHITE "\x1b[37;1m"
|
||||
#define COLOR_BG_BLACK "\x1b[40;1m"
|
||||
#define COLOR_BG_RED "\x1b[41;1m"
|
||||
#define COLOR_BG_GREEN "\x1b[42;1m"
|
||||
#define COLOR_BG_YELLOW "\x1b[43;1m"
|
||||
#define COLOR_BG_BLUE "\x1b[44;1m"
|
||||
#define COLOR_BG_MAGENTA "\x1b[45;1m"
|
||||
#define COLOR_BG_CYAN "\x1b[46;1m"
|
||||
#define COLOR_BG_WHITE "\x1b[47;1m"
|
||||
#define COLOR_FG_B_BLACK "\x1b[90;1m"
|
||||
#define COLOR_FG_B_RED "\x1b[91;1m"
|
||||
#define COLOR_FG_B_GREEN "\x1b[92;1m"
|
||||
#define COLOR_FG_B_YELLOW "\x1b[93;1m"
|
||||
#define COLOR_FG_B_BLUE "\x1b[94;1m"
|
||||
#define COLOR_FG_B_MAGENTA "\x1b[95;1m"
|
||||
#define COLOR_FG_B_CYAN "\x1b[96;1m"
|
||||
#define COLOR_FG_B_WHITE "\x1b[97;1m"
|
||||
#define COLOR_RESET "\x1b[m"
|
||||
#define C_NONE ""
|
||||
#define C_FG_BLACK "\x1b[30;1m"
|
||||
#define C_FG_RED "\x1b[31;1m"
|
||||
#define C_FG_GREEN "\x1b[32;1m"
|
||||
#define C_FG_YELLOW "\x1b[33;1m"
|
||||
#define C_FG_BLUE "\x1b[34;1m"
|
||||
#define C_FG_MAGENTA "\x1b[35;1m"
|
||||
#define C_FG_CYAN "\x1b[36;1m"
|
||||
#define C_FG_WHITE "\x1b[37;1m"
|
||||
#define C_BG_BLACK "\x1b[40;1m"
|
||||
#define C_BG_RED "\x1b[41;1m"
|
||||
#define C_BG_GREEN "\x1b[42;1m"
|
||||
#define C_BG_YELLOW "\x1b[43;1m"
|
||||
#define C_BG_BLUE "\x1b[44;1m"
|
||||
#define C_BG_MAGENTA "\x1b[45;1m"
|
||||
#define C_BG_CYAN "\x1b[46;1m"
|
||||
#define C_BG_WHITE "\x1b[47;1m"
|
||||
#define C_FG_B_BLACK "\x1b[90;1m"
|
||||
#define C_FG_B_RED "\x1b[91;1m"
|
||||
#define C_FG_B_GREEN "\x1b[92;1m"
|
||||
#define C_FG_B_YELLOW "\x1b[93;1m"
|
||||
#define C_FG_B_BLUE "\x1b[94;1m"
|
||||
#define C_FG_B_MAGENTA "\x1b[95;1m"
|
||||
#define C_FG_B_CYAN "\x1b[96;1m"
|
||||
#define C_FG_B_WHITE "\x1b[97;1m"
|
||||
#define C_RESET "\x1b[m"
|
||||
|
||||
struct ascii_logo {
|
||||
const char* art;
|
||||
@@ -59,6 +59,23 @@ $C2## ## ## ## ## ## ## ## #: :# \
|
||||
$C2## ## ## ## ## ## ## ## ####### \
|
||||
$C2## ## ### ## ###### ## ## ## "
|
||||
|
||||
#define ASCII_INTEL \
|
||||
"$C1 .#################. \
|
||||
$C1 .#### ####. \
|
||||
$C1 .## ### \
|
||||
$C1 ## :## ### \
|
||||
$C1 # ## :## ## \
|
||||
$C1 ## ## ######. #### ###### :## ## \
|
||||
$C1 ## ## ##: ##: ## ## ### :## ### \
|
||||
$C1## ## ##: ##: ## :######## :## ## \
|
||||
$C1## ## ##: ##: ## ##. . :## #### \
|
||||
$C1## # ##: ##: #### #####: ## \
|
||||
$C1 ## \
|
||||
$C1 ###. ..o####. \
|
||||
$C1 ######oo... ..oo####### \
|
||||
$C1 o###############o "
|
||||
|
||||
// LONG LOGOS
|
||||
#define ASCII_NVIDIA_L \
|
||||
"$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \
|
||||
$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \
|
||||
@@ -76,14 +93,37 @@ $C1 olcc::; ,:ccloMMMMMMMMM \
|
||||
$C1 :......oMMMMMMMMMMMMMMMMMMMMMM \
|
||||
$C1 :lllMMMMMMMMMMMMMMMMMMMMMMMMMM "
|
||||
|
||||
#define ASCII_INTEL_L \
|
||||
"$C1 ###############@ \
|
||||
$C1 ######@ ######@ \
|
||||
$C1 ###@ ###@ \
|
||||
$C1 ##@ ###@ \
|
||||
$C1 ##@ ##@ \
|
||||
$C1 ##@ ##@ \
|
||||
$C1 @ ##@ ##@ ##@ \
|
||||
$C1 #@ ##@ ########@ #####@ #####@ ##@ ##@ \
|
||||
$C1 #@ ##@ ##@ ##@ ##@ ###@ ###@ ##@ ##@ \
|
||||
$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ \
|
||||
$C1 #@ ##@ ##@ ##@ ##@ #########@ ##@ ###@ \
|
||||
$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ####@ \
|
||||
$C1 #@ #@ ##@ ##@ ####@ ########@ #@ ##@ \
|
||||
$C1 ##@ \
|
||||
$C1 ##@ \
|
||||
$C1 ###@ ###@ \
|
||||
$C1 ####@ #########@ \
|
||||
$C1 #########@ ###############@ \
|
||||
$C1 ##############################@ "
|
||||
|
||||
typedef struct ascii_logo asciiL;
|
||||
|
||||
// ------------------------------------------------------------------------------------------------------
|
||||
// | LOGO | W | H | REPLACE | COLORS LOGO (>0 && <10) | COLORS TEXT (=2) |
|
||||
// ------------------------------------------------------------------------------------------------------
|
||||
asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} };
|
||||
// Long variants | ---------------------------------------------------------------------------------------------------|
|
||||
asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} };
|
||||
asciiL logo_unknown = { NULL, 0, 0, false, {COLOR_NONE}, {COLOR_NONE, COLOR_NONE} };
|
||||
// ------------------------------------------------------------------------------------------
|
||||
// | LOGO | W | H | REPLACE | COLORS LOGO | COLORS TEXT |
|
||||
// ------------------------------------------------------------------------------------------
|
||||
asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
|
||||
asciiL logo_intel = { ASCII_INTEL, 48, 14, false, {C_FG_CYAN}, {C_FG_CYAN, C_FG_WHITE} };
|
||||
// Long variants | ---------------------------------------------------------------------------------------|
|
||||
asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
|
||||
asciiL logo_intel_l = { ASCII_INTEL_L, 62, 19, true, {C_BG_CYAN, C_BG_WHITE}, {C_FG_CYAN, C_FG_WHITE} };
|
||||
asciiL logo_unknown = { NULL, 0, 0, false, {C_NONE}, {C_NONE, C_NONE} };
|
||||
|
||||
#endif
|
||||
|
||||
@@ -32,8 +32,6 @@ VENDOR get_gpu_vendor(struct gpu_info* gpu) {
|
||||
return gpu->vendor;
|
||||
}
|
||||
|
||||
double trunc(double val) { return ((int)(100 * val)) / 100.0; }
|
||||
|
||||
int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
|
||||
int32_t ret;
|
||||
int max_len = 10; // Max is 8 for digits, 2 for units
|
||||
@@ -145,6 +143,13 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
|
||||
}
|
||||
|
||||
char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
|
||||
return get_str_peak_performance_generic(gpu->peak_performance_t);
|
||||
return get_str_peak_performance_generic(gpu->peak_performance_tcu);
|
||||
}
|
||||
|
||||
char* get_str_generic(int32_t data) {
|
||||
// Largest int is 10, +1 for possible negative, +1 for EOL
|
||||
uint32_t max_size = 12;
|
||||
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
||||
snprintf(dummy, max_size, "%d", data);
|
||||
return dummy;
|
||||
}
|
||||
|
||||
@@ -9,7 +9,8 @@
|
||||
#define UNKNOWN_FREQ -1
|
||||
|
||||
enum {
|
||||
GPU_VENDOR_NVIDIA
|
||||
GPU_VENDOR_NVIDIA,
|
||||
GPU_VENDOR_INTEL
|
||||
};
|
||||
|
||||
enum {
|
||||
@@ -43,6 +44,12 @@ struct topology {
|
||||
int32_t tensor_cores;
|
||||
};
|
||||
|
||||
struct topology_i {
|
||||
int32_t slices;
|
||||
int32_t subslices;
|
||||
int32_t eu_subslice;
|
||||
};
|
||||
|
||||
struct memory {
|
||||
int64_t size_bytes;
|
||||
MEMTYPE type;
|
||||
@@ -58,10 +65,11 @@ struct gpu_info {
|
||||
int64_t freq;
|
||||
struct pci* pci;
|
||||
struct topology* topo;
|
||||
struct topology_i* topo_i;
|
||||
struct memory* mem;
|
||||
struct cache* cach;
|
||||
int64_t peak_performance;
|
||||
int64_t peak_performance_t;
|
||||
int64_t peak_performance_tcu;
|
||||
int32_t idx;
|
||||
};
|
||||
|
||||
@@ -75,5 +83,6 @@ char* get_str_memory_clock(struct gpu_info* gpu);
|
||||
char* get_str_l2(struct gpu_info* gpu);
|
||||
char* get_str_peak_performance(struct gpu_info* gpu);
|
||||
char* get_str_peak_performance_tensor(struct gpu_info* gpu);
|
||||
char* get_str_generic(int32_t data);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include "args.hpp"
|
||||
#include "global.hpp"
|
||||
#include "master.hpp"
|
||||
#include "../cuda/cuda.hpp"
|
||||
#include "../cuda/uarch.hpp"
|
||||
|
||||
@@ -65,18 +66,19 @@ int main(int argc, char* argv[]) {
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
struct gpu_list* list = get_gpu_list();
|
||||
if(list_gpus()) {
|
||||
return print_gpus_list();
|
||||
return print_gpus_list(list);
|
||||
}
|
||||
|
||||
set_log_level(true);
|
||||
|
||||
printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\
|
||||
printf("[WARNING]: gpufetch is in beta. The provided information may be incomplete or wrong.\n\
|
||||
If you want to help to improve gpufetch, please compare the output of the program\n\
|
||||
with a reliable source which you know is right (e.g, techpowerup.com) and report\n\
|
||||
any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues");
|
||||
any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues\n");
|
||||
|
||||
struct gpu_info* gpu = get_gpu_info(get_gpu_idx());
|
||||
struct gpu_info* gpu = get_gpu_info(list, get_gpu_idx());
|
||||
if(gpu == NULL)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
|
||||
62
src/common/master.cpp
Normal file
62
src/common/master.cpp
Normal file
@@ -0,0 +1,62 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "master.hpp"
|
||||
#include "../cuda/cuda.hpp"
|
||||
#include "../intel/intel.hpp"
|
||||
|
||||
#define MAX_GPUS 1000
|
||||
|
||||
struct gpu_list {
|
||||
struct gpu_info ** gpus;
|
||||
int num_gpus;
|
||||
};
|
||||
|
||||
struct gpu_list* get_gpu_list() {
|
||||
int idx = 0;
|
||||
struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
|
||||
list->num_gpus = 0;
|
||||
list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
|
||||
|
||||
#ifdef BACKEND_CUDA
|
||||
bool valid = true;
|
||||
|
||||
while(valid) {
|
||||
list->gpus[idx] = get_gpu_info_cuda(idx);
|
||||
if(list->gpus[idx] != NULL) idx++;
|
||||
else valid = false;
|
||||
}
|
||||
|
||||
list->num_gpus += idx;
|
||||
#endif
|
||||
|
||||
#ifdef BACKEND_INTEL
|
||||
list->gpus[idx] = get_gpu_info_intel();
|
||||
if(list->gpus[idx] != NULL) list->num_gpus++;
|
||||
#endif
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
bool print_gpus_list(struct gpu_list* list) {
|
||||
for(int i=0; i < list->num_gpus; i++) {
|
||||
printf("GPU %d: ", i);
|
||||
if(list->gpus[i]->vendor == GPU_VENDOR_NVIDIA) {
|
||||
#ifdef BACKEND_CUDA
|
||||
print_gpu_cuda(list->gpus[i]);
|
||||
#endif
|
||||
}
|
||||
else if(list->gpus[i]->vendor == GPU_VENDOR_INTEL) {
|
||||
#ifdef BACKEND_INTEL
|
||||
print_gpu_intel(list->gpus[i]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
struct gpu_info* get_gpu_info(struct gpu_list* list, int idx) {
|
||||
return list->gpus[idx];
|
||||
}
|
||||
12
src/common/master.hpp
Normal file
12
src/common/master.hpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef __GPU_LIST__
|
||||
#define __GPU_LIST__
|
||||
|
||||
#include "gpu.hpp"
|
||||
|
||||
struct gpu_list;
|
||||
|
||||
struct gpu_list* get_gpu_list();
|
||||
bool print_gpus_list(struct gpu_list* list);
|
||||
struct gpu_info* get_gpu_info(struct gpu_list* list, int idx);
|
||||
|
||||
#endif
|
||||
@@ -2,33 +2,61 @@
|
||||
#include "pci.hpp"
|
||||
#include <cstddef>
|
||||
|
||||
/*
|
||||
* doc: https://wiki.osdev.org/PCI#Class_Codes
|
||||
* https://pci-ids.ucw.cz/read/PC
|
||||
*/
|
||||
#define VENDOR_ID_NVIDIA 0x10de
|
||||
#define CLASS_VGA_CONTROLLER 0x0300
|
||||
|
||||
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices) {
|
||||
bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
|
||||
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
|
||||
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
return dev->vendor_id;
|
||||
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
printErr("Unable to find a CUDA device using pciutils");
|
||||
return 0;
|
||||
|
||||
printWarn("Unable to find a valid device for id %d using pciutils", id);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices) {
|
||||
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) {
|
||||
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
|
||||
if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
return dev->device_id;
|
||||
}
|
||||
}
|
||||
printErr("Unable to find a CUDA device using pciutils");
|
||||
|
||||
printErr("Unable to find a valid device for id %d using pciutils", id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) {
|
||||
bool found = false;
|
||||
|
||||
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
|
||||
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
pci->domain = dev->domain;
|
||||
pci->bus = dev->bus;
|
||||
pci->dev = dev->dev;
|
||||
pci->func = dev->func;
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if(!found) printErr("Unable to find a valid device for id %d using pciutils", id);
|
||||
}
|
||||
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) {
|
||||
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
|
||||
|
||||
// TODO: Refactor this; instead of 2xGet + 1xSet, do it better
|
||||
if(pciutils_is_vendor_id_present(devices, id)) {
|
||||
pci->vendor_id = id;
|
||||
pci->device_id = pciutils_get_pci_device_id(devices, id);
|
||||
pciutils_set_pci_bus(pci, devices, id);
|
||||
return pci;
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
struct pci_dev *get_pci_devices_from_pciutils() {
|
||||
struct pci_access *pacc;
|
||||
struct pci_dev *dev;
|
||||
|
||||
@@ -6,8 +6,16 @@ extern "C" {
|
||||
#include <pci/pci.h>
|
||||
}
|
||||
|
||||
uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices);
|
||||
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices);
|
||||
struct pci {
|
||||
uint16_t vendor_id;
|
||||
uint16_t device_id;
|
||||
uint16_t domain;
|
||||
uint16_t bus;
|
||||
uint16_t dev;
|
||||
uint16_t func;
|
||||
};
|
||||
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id);
|
||||
struct pci_dev *get_pci_devices_from_pciutils();
|
||||
|
||||
#endif
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include "../common/global.hpp"
|
||||
#include "../common/gpu.hpp"
|
||||
|
||||
#include "../intel/uarch.hpp"
|
||||
#include "../intel/intel.hpp"
|
||||
#include "../cuda/cuda.hpp"
|
||||
#include "../cuda/uarch.hpp"
|
||||
|
||||
@@ -34,11 +36,13 @@ enum {
|
||||
ATTRIBUTE_CHIP,
|
||||
ATTRIBUTE_UARCH,
|
||||
ATTRIBUTE_TECHNOLOGY,
|
||||
ATTRIBUTE_GT,
|
||||
ATTRIBUTE_FREQUENCY,
|
||||
ATTRIBUTE_STREAMINGMP,
|
||||
ATTRIBUTE_CORESPERMP,
|
||||
ATTRIBUTE_CUDA_CORES,
|
||||
ATTRIBUTE_TENSOR_CORES,
|
||||
ATTRIBUTE_EUS,
|
||||
ATTRIBUTE_L2,
|
||||
ATTRIBUTE_MEMORY,
|
||||
ATTRIBUTE_MEMORY_FREQ,
|
||||
@@ -52,11 +56,13 @@ static const char* ATTRIBUTE_FIELDS [] = {
|
||||
"GPU processor:",
|
||||
"Microarchitecture:",
|
||||
"Technology:",
|
||||
"Graphics Tier:",
|
||||
"Max Frequency:",
|
||||
"SMs:",
|
||||
"Cores/SM:",
|
||||
"CUDA Cores:",
|
||||
"Tensor Cores:",
|
||||
"Execution Units:",
|
||||
"L2 Size:",
|
||||
"Memory:",
|
||||
"Memory frequency:",
|
||||
@@ -70,11 +76,13 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
||||
"Processor:",
|
||||
"uArch:",
|
||||
"Technology:",
|
||||
"GT:",
|
||||
"Max Freq.:",
|
||||
"SMs:",
|
||||
"Cores/SM:",
|
||||
"CUDA Cores:",
|
||||
"Tensor Cores:",
|
||||
"EUs:",
|
||||
"L2 Size:",
|
||||
"Memory:",
|
||||
"Memory freq.:",
|
||||
@@ -200,23 +208,32 @@ void replace_bgbyfg_color(struct ascii_logo* logo) {
|
||||
for(int i=0; i < 2; i++) {
|
||||
if(logo->color_ascii[i] == NULL) break;
|
||||
|
||||
if(strcmp(logo->color_ascii[i], COLOR_BG_BLACK) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLACK);
|
||||
else if(strcmp(logo->color_ascii[i], COLOR_BG_RED) == 0) strcpy(logo->color_ascii[i], COLOR_FG_RED);
|
||||
else if(strcmp(logo->color_ascii[i], COLOR_BG_GREEN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_GREEN);
|
||||
else if(strcmp(logo->color_ascii[i], COLOR_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], COLOR_FG_YELLOW);
|
||||
else if(strcmp(logo->color_ascii[i], COLOR_BG_BLUE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLUE);
|
||||
else if(strcmp(logo->color_ascii[i], COLOR_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], COLOR_FG_MAGENTA);
|
||||
else if(strcmp(logo->color_ascii[i], COLOR_BG_CYAN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_CYAN);
|
||||
else if(strcmp(logo->color_ascii[i], COLOR_BG_WHITE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_WHITE);
|
||||
if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
|
||||
else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
|
||||
else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
|
||||
else if(strcmp(logo->color_ascii[i], C_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], C_FG_YELLOW);
|
||||
else if(strcmp(logo->color_ascii[i], C_BG_BLUE) == 0) strcpy(logo->color_ascii[i], C_FG_BLUE);
|
||||
else if(strcmp(logo->color_ascii[i], C_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], C_FG_MAGENTA);
|
||||
else if(strcmp(logo->color_ascii[i], C_BG_CYAN) == 0) strcpy(logo->color_ascii[i], C_FG_CYAN);
|
||||
else if(strcmp(logo->color_ascii[i], C_BG_WHITE) == 0) strcpy(logo->color_ascii[i], C_FG_WHITE);
|
||||
}
|
||||
}
|
||||
|
||||
struct ascii_logo* choose_ascii_art_aux(struct ascii_logo* logo_long, struct ascii_logo* logo_short, struct terminal* term, int lf) {
|
||||
if(ascii_fits_screen(term->w, *logo_long, lf)) {
|
||||
return logo_long;
|
||||
}
|
||||
else {
|
||||
return logo_short;
|
||||
}
|
||||
}
|
||||
|
||||
void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) {
|
||||
if(art->vendor == GPU_VENDOR_NVIDIA) {
|
||||
if(term != NULL && ascii_fits_screen(term->w, logo_nvidia_l, lf))
|
||||
art->art = &logo_nvidia_l;
|
||||
else
|
||||
art->art = &logo_nvidia;
|
||||
art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf);
|
||||
}
|
||||
else if(art->vendor == GPU_VENDOR_INTEL) {
|
||||
art->art = choose_ascii_art_aux(&logo_intel_l, &logo_intel, term, lf);
|
||||
}
|
||||
else {
|
||||
art->art = &logo_unknown;
|
||||
@@ -228,10 +245,10 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
|
||||
switch(art->style) {
|
||||
case STYLE_LEGACY:
|
||||
logo->replace_blocks = false;
|
||||
strcpy(logo->color_text[0], COLOR_NONE);
|
||||
strcpy(logo->color_text[1], COLOR_NONE);
|
||||
strcpy(logo->color_ascii[0], COLOR_NONE);
|
||||
strcpy(logo->color_ascii[1], COLOR_NONE);
|
||||
strcpy(logo->color_text[0], C_NONE);
|
||||
strcpy(logo->color_text[1], C_NONE);
|
||||
strcpy(logo->color_ascii[0], C_NONE);
|
||||
strcpy(logo->color_ascii[1], C_NONE);
|
||||
art->reset[0] = '\0';
|
||||
break;
|
||||
case STYLE_RETRO:
|
||||
@@ -245,7 +262,7 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
|
||||
strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true));
|
||||
strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true));
|
||||
}
|
||||
strcpy(art->reset, COLOR_RESET);
|
||||
strcpy(art->reset, C_RESET);
|
||||
break;
|
||||
case STYLE_INVALID:
|
||||
default:
|
||||
@@ -342,6 +359,48 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#ifdef BACKEND_INTEL
|
||||
bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
|
||||
struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
|
||||
|
||||
if(art == NULL)
|
||||
return false;
|
||||
|
||||
char* gpu_name = get_str_gpu_name(gpu);
|
||||
char* uarch = get_str_uarch_intel(gpu->arch);
|
||||
char* gt = get_str_gt(gpu->arch);
|
||||
char* manufacturing_process = get_str_process(gpu->arch);
|
||||
char* eus = get_str_eu(gpu);
|
||||
char* max_frequency = get_str_freq(gpu);
|
||||
char* pp = get_str_peak_performance(gpu);
|
||||
|
||||
setAttribute(art, ATTRIBUTE_NAME, gpu_name);
|
||||
setAttribute(art, ATTRIBUTE_UARCH, uarch);
|
||||
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
|
||||
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
|
||||
setAttribute(art, ATTRIBUTE_GT, gt);
|
||||
setAttribute(art, ATTRIBUTE_EUS, eus);
|
||||
setAttribute(art, ATTRIBUTE_PEAK, pp);
|
||||
|
||||
const char** attribute_fields = ATTRIBUTE_FIELDS;
|
||||
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
|
||||
uint32_t longest_field = longest_field_length(art, longest_attribute);
|
||||
choose_ascii_art(art, cs, term, longest_field);
|
||||
|
||||
if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
|
||||
// Despite of choosing the smallest logo, the output does not fit
|
||||
// Choose the shorter field names and recalculate the longest attr
|
||||
attribute_fields = ATTRIBUTE_FIELDS_SHORT;
|
||||
longest_attribute = longest_attribute_length(art, attribute_fields);
|
||||
}
|
||||
|
||||
print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BACKEND_CUDA
|
||||
bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
|
||||
struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
|
||||
|
||||
@@ -350,7 +409,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
|
||||
|
||||
char* gpu_name = get_str_gpu_name(gpu);
|
||||
char* gpu_chip = get_str_chip(gpu->arch);
|
||||
char* uarch = get_str_uarch(gpu->arch);
|
||||
char* uarch = get_str_uarch_cuda(gpu->arch);
|
||||
char* comp_cap = get_str_cc(gpu->arch);
|
||||
char* manufacturing_process = get_str_process(gpu->arch);
|
||||
char* sms = get_str_sm(gpu);
|
||||
@@ -416,6 +475,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct terminal* get_terminal_size() {
|
||||
struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal));
|
||||
@@ -448,5 +508,17 @@ struct terminal* get_terminal_size() {
|
||||
bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) {
|
||||
struct terminal* term = get_terminal_size();
|
||||
|
||||
return print_gpufetch_cuda(gpu, s, cs, term);
|
||||
if(gpu->vendor == GPU_VENDOR_NVIDIA)
|
||||
#ifdef BACKEND_CUDA
|
||||
return print_gpufetch_cuda(gpu, s, cs, term);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
else {
|
||||
#ifdef BACKEND_INTEL
|
||||
return print_gpufetch_intel(gpu, s, cs, term);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
28
src/common/uarch.cpp
Normal file
28
src/common/uarch.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "global.hpp"
|
||||
#include "uarch.hpp"
|
||||
|
||||
char* get_str_process(struct uarch* arch) {
|
||||
char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
|
||||
int32_t process = arch->process;
|
||||
|
||||
if(process == UNK) {
|
||||
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
|
||||
}
|
||||
else if(process > 100) {
|
||||
sprintf(str, "%.2fum", (double)process/100);
|
||||
}
|
||||
else if(process > 0){
|
||||
sprintf(str, "%dnm", process);
|
||||
}
|
||||
else {
|
||||
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
|
||||
printBug("Found invalid process: '%d'", process);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
31
src/common/uarch.hpp
Normal file
31
src/common/uarch.hpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#ifndef __COMMON_UARCH__
|
||||
#define __COMMON_UARCH__
|
||||
|
||||
// Data not available
|
||||
#define NA -1
|
||||
|
||||
// Unknown manufacturing process
|
||||
#define UNK -1
|
||||
|
||||
typedef uint32_t GPUCHIP;
|
||||
typedef uint32_t MICROARCH;
|
||||
|
||||
struct uarch {
|
||||
// NVIDIA specific
|
||||
int32_t cc_major;
|
||||
int32_t cc_minor;
|
||||
int32_t compute_capability;
|
||||
|
||||
// Intel specific
|
||||
int32_t gt;
|
||||
int32_t eu;
|
||||
|
||||
MICROARCH uarch;
|
||||
GPUCHIP chip;
|
||||
|
||||
int32_t process;
|
||||
char* uarch_str;
|
||||
char* chip_str;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,10 +1,10 @@
|
||||
#ifndef __GPUCHIPS__
|
||||
#define __GPUCHIPS__
|
||||
#ifndef __CUDA_GPUCHIPS__
|
||||
#define __CUDA_GPUCHIPS__
|
||||
|
||||
typedef uint32_t GPUCHIP;
|
||||
|
||||
enum {
|
||||
CHIP_UNKNOWN,
|
||||
CHIP_UNKNOWN_CUDA,
|
||||
CHIP_G80,
|
||||
CHIP_G80GL,
|
||||
CHIP_G84,
|
||||
|
||||
@@ -6,40 +6,12 @@
|
||||
#include "../common/pci.hpp"
|
||||
#include "../common/global.hpp"
|
||||
|
||||
int print_gpus_list() {
|
||||
cudaError_t err = cudaSuccess;
|
||||
int num_gpus = -1;
|
||||
bool print_gpu_cuda(struct gpu_info* gpu) {
|
||||
char* cc = get_str_cc(gpu->arch);
|
||||
printf("%s (Compute Capability %s)\n", gpu->name, cc);
|
||||
free(cc);
|
||||
|
||||
if ((err = cudaGetDeviceCount(&num_gpus)) != cudaSuccess) {
|
||||
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
printf("CUDA GPUs available: %d\n", num_gpus);
|
||||
|
||||
if(num_gpus > 0) {
|
||||
cudaDeviceProp deviceProp;
|
||||
int max_len = 0;
|
||||
|
||||
for(int idx=0; idx < num_gpus; idx++) {
|
||||
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
|
||||
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
max_len = max(max_len, (int) strlen(deviceProp.name));
|
||||
}
|
||||
|
||||
for(int i=0; i < max_len + 32; i++) putchar('-');
|
||||
putchar('\n');
|
||||
for(int idx=0; idx < num_gpus; idx++) {
|
||||
if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
|
||||
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
printf("GPU %d: %s (Compute Capability %d.%d)\n", idx, deviceProp.name, deviceProp.major, deviceProp.minor);
|
||||
}
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct cache* get_cache_info(cudaDeviceProp prop) {
|
||||
@@ -104,12 +76,12 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
|
||||
}
|
||||
|
||||
// Compute peak performance when using CUDA cores
|
||||
int64_t get_peak_performance(struct gpu_info* gpu) {
|
||||
int64_t get_peak_performance_cuda(struct gpu_info* gpu) {
|
||||
return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
|
||||
}
|
||||
|
||||
// Compute peak performance when using tensor cores
|
||||
int64_t get_peak_performance_t(cudaDeviceProp prop, struct gpu_info* gpu) {
|
||||
int64_t get_peak_performance_tcu(cudaDeviceProp prop, struct gpu_info* gpu) {
|
||||
// Volta / Turing tensor cores performs 4x4x4 FP16 matrix multiplication
|
||||
// Ampere tensor cores performs 8x4x8 FP16 matrix multiplicacion
|
||||
if(prop.major == 7) return gpu->freq * 1000000 * 4 * 4 * 4 * 2 * gpu->topo->tensor_cores;
|
||||
@@ -117,7 +89,7 @@ int64_t get_peak_performance_t(cudaDeviceProp prop, struct gpu_info* gpu) {
|
||||
else return 0;
|
||||
}
|
||||
|
||||
struct gpu_info* get_gpu_info(int gpu_idx) {
|
||||
struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
|
||||
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
||||
gpu->pci = NULL;
|
||||
gpu->idx = gpu_idx;
|
||||
@@ -127,8 +99,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
printf("Waiting for CUDA driver to start...");
|
||||
fflush(stdout);
|
||||
if(gpu_idx == 0) {
|
||||
printf("Waiting for CUDA driver to start...");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
int num_gpus = -1;
|
||||
cudaError_t err = cudaSuccess;
|
||||
@@ -136,7 +110,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
|
||||
printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
|
||||
return NULL;
|
||||
}
|
||||
printf("\r ");
|
||||
|
||||
if(gpu_idx == 0) {
|
||||
printf("\r");
|
||||
}
|
||||
|
||||
if(num_gpus <= 0) {
|
||||
printErr("No CUDA capable devices found!");
|
||||
@@ -144,7 +121,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
|
||||
}
|
||||
|
||||
if(gpu->idx+1 > num_gpus) {
|
||||
printErr("Requested GPU index %d in a system with %d GPUs", gpu->idx, num_gpus);
|
||||
// Master is trying to query an invalid GPU
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -160,25 +137,22 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
|
||||
strcpy(gpu->name, deviceProp.name);
|
||||
|
||||
struct pci_dev *devices = get_pci_devices_from_pciutils();
|
||||
gpu->pci = get_pci_from_pciutils(devices);
|
||||
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA);
|
||||
gpu->arch = get_uarch_from_cuda(gpu);
|
||||
gpu->cach = get_cache_info(deviceProp);
|
||||
gpu->mem = get_memory_info(gpu, deviceProp);
|
||||
gpu->topo = get_topology_info(deviceProp);
|
||||
<<<<<<< HEAD
|
||||
gpu->peak_performance = get_peak_performance(gpu);
|
||||
gpu->peak_performance_t = get_peak_performance_t(deviceProp, gpu);
|
||||
=======
|
||||
gpu->peak_performance = get_peak_performance_cuda(gpu);
|
||||
gpu->peak_performance_tcu = get_peak_performance_tcu(gpu);
|
||||
>>>>>>> origin/intel
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
char* get_str_generic(int32_t data) {
|
||||
// Largest int is 10, +1 for possible negative, +1 for EOL
|
||||
uint32_t max_size = 12;
|
||||
char* dummy = (char *) ecalloc(max_size, sizeof(char));
|
||||
snprintf(dummy, max_size, "%d", data);
|
||||
return dummy;
|
||||
}
|
||||
|
||||
char* get_str_sm(struct gpu_info* gpu) {
|
||||
return get_str_generic(gpu->topo->streaming_mp);
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
#ifndef __CUDA__
|
||||
#define __CUDA__
|
||||
#ifndef __CUDA_GPU__
|
||||
#define __CUDA_GPU__
|
||||
|
||||
#include "../common/gpu.hpp"
|
||||
|
||||
struct gpu_info* get_gpu_info(int gpu_idx);
|
||||
int print_gpus_list();
|
||||
struct gpu_info* get_gpu_info_cuda(int gpu_idx);
|
||||
bool print_gpu_cuda(struct gpu_info* gpu);
|
||||
char* get_str_sm(struct gpu_info* gpu);
|
||||
char* get_str_cores_sm(struct gpu_info* gpu);
|
||||
char* get_str_cuda_cores(struct gpu_info* gpu);
|
||||
|
||||
@@ -8,21 +8,7 @@
|
||||
#define CHECK_PCI_START if (false) {}
|
||||
#define CHECK_PCI(pci, id, chip) \
|
||||
else if (pci->device_id == id) return chip;
|
||||
#define CHECK_PCI_END else { printBug("TODOO"); return CHIP_UNKNOWN; }
|
||||
|
||||
struct pci {
|
||||
uint16_t vendor_id;
|
||||
uint16_t device_id;
|
||||
};
|
||||
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
|
||||
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
|
||||
|
||||
pci->vendor_id = pciutils_get_pci_vendor_id(devices);
|
||||
pci->device_id = pciutils_get_pci_device_id(devices);
|
||||
|
||||
return pci;
|
||||
}
|
||||
#define CHECK_PCI_END else { printBug("Unkown CUDA device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_CUDA; }
|
||||
|
||||
/*
|
||||
* pci ids were retrieved using https://github.com/pciutils/pciids
|
||||
@@ -33,7 +19,7 @@ struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
|
||||
* or in pci.ids itself)
|
||||
*/
|
||||
|
||||
GPUCHIP get_chip_from_pci(struct pci* pci) {
|
||||
GPUCHIP get_chip_from_pci_cuda(struct pci* pci) {
|
||||
CHECK_PCI_START
|
||||
CHECK_PCI(pci, 0x25e5, CHIP_GA107BM)
|
||||
CHECK_PCI(pci, 0x25e2, CHIP_GA107BM)
|
||||
|
||||
@@ -6,9 +6,14 @@
|
||||
#include "../common/pci.hpp"
|
||||
#include "chips.hpp"
|
||||
|
||||
/*
|
||||
* doc: https://wiki.osdev.org/PCI#Class_Codes
|
||||
* https://pci-ids.ucw.cz/read/PC
|
||||
*/
|
||||
#define PCI_VENDOR_ID_NVIDIA 0x10de
|
||||
|
||||
struct pci;
|
||||
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices);
|
||||
GPUCHIP get_chip_from_pci(struct pci* pci);
|
||||
GPUCHIP get_chip_from_pci_cuda(struct pci* pci);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -3,21 +3,14 @@
|
||||
#include <stdint.h>
|
||||
#include <cstddef>
|
||||
|
||||
#include "../common/uarch.hpp"
|
||||
#include "../common/global.hpp"
|
||||
#include "../common/gpu.hpp"
|
||||
#include "chips.hpp"
|
||||
|
||||
typedef uint32_t MICROARCH;
|
||||
|
||||
// Any clock multiplier
|
||||
#define CM_ANY -1
|
||||
|
||||
// Data not available
|
||||
#define NA -1
|
||||
|
||||
// Unknown manufacturing process
|
||||
#define UNK -1
|
||||
|
||||
// MICROARCH values
|
||||
enum {
|
||||
UARCH_UNKNOWN,
|
||||
@@ -43,23 +36,10 @@ static const char *uarch_str[] = {
|
||||
/*[ARCH_AMPERE] = */ "Ampere",
|
||||
};
|
||||
|
||||
struct uarch {
|
||||
int32_t cc_major;
|
||||
int32_t cc_minor;
|
||||
int32_t compute_capability;
|
||||
|
||||
MICROARCH uarch;
|
||||
GPUCHIP chip;
|
||||
|
||||
int32_t process;
|
||||
char* uarch_str;
|
||||
char* chip_str;
|
||||
};
|
||||
|
||||
#define CHECK_UARCH_START if (false) {}
|
||||
#define CHECK_UARCH(arch, chip_, str, uarch, process) \
|
||||
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
|
||||
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }
|
||||
#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }
|
||||
|
||||
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
|
||||
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
|
||||
@@ -74,7 +54,7 @@ void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t proce
|
||||
* o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip
|
||||
* o CHIP_XXXM: indicates a mobile chip
|
||||
*/
|
||||
void map_chip_to_uarch(struct uarch* arch) {
|
||||
void map_chip_to_uarch_cuda(struct uarch* arch) {
|
||||
CHECK_UARCH_START
|
||||
// TESLA (1.0, 1.1, 1.2, 1.3) //
|
||||
CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90)
|
||||
@@ -263,9 +243,8 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
|
||||
arch->cc_major = deviceProp.major;
|
||||
arch->cc_minor = deviceProp.minor;
|
||||
arch->compute_capability = deviceProp.major * 10 + deviceProp.minor;
|
||||
arch->chip = get_chip_from_pci(gpu->pci);
|
||||
|
||||
map_chip_to_uarch(arch);
|
||||
arch->chip = get_chip_from_pci_cuda(gpu->pci);
|
||||
map_chip_to_uarch_cuda(arch);
|
||||
|
||||
return arch;
|
||||
}
|
||||
@@ -335,10 +314,6 @@ MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
|
||||
CHECK_MEMTYPE_END
|
||||
}
|
||||
|
||||
const char* get_str_uarch(struct uarch* arch) {
|
||||
return uarch_str[arch->uarch];
|
||||
}
|
||||
|
||||
char* get_str_cc(struct uarch* arch) {
|
||||
uint32_t max_size = 4;
|
||||
char* cc = (char *) ecalloc(max_size, sizeof(char));
|
||||
@@ -346,31 +321,14 @@ char* get_str_cc(struct uarch* arch) {
|
||||
return cc;
|
||||
}
|
||||
|
||||
char* get_str_process(struct uarch* arch) {
|
||||
char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
|
||||
int32_t process = arch->process;
|
||||
|
||||
if(process == UNK) {
|
||||
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
|
||||
}
|
||||
else if(process > 100) {
|
||||
sprintf(str, "%.2fum", (double)process/100);
|
||||
}
|
||||
else if(process > 0){
|
||||
sprintf(str, "%dnm", process);
|
||||
}
|
||||
else {
|
||||
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
|
||||
printBug("Found invalid process: '%d'", process);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
char* get_str_chip(struct uarch* arch) {
|
||||
return arch->chip_str;
|
||||
}
|
||||
|
||||
const char* get_str_uarch_cuda(struct uarch* arch) {
|
||||
return uarch_str[arch->uarch];
|
||||
}
|
||||
|
||||
void free_uarch_struct(struct uarch* arch) {
|
||||
free(arch->uarch_str);
|
||||
free(arch->chip_str);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef __UARCH__
|
||||
#define __UARCH__
|
||||
#ifndef __CUDA_UARCH__
|
||||
#define __CUDA_UARCH__
|
||||
|
||||
#include "../common/gpu.hpp"
|
||||
|
||||
@@ -8,7 +8,7 @@ struct uarch;
|
||||
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
|
||||
bool clkm_possible_for_uarch(int clkm, struct uarch* arch);
|
||||
MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
|
||||
char* get_str_uarch(struct uarch* arch);
|
||||
char* get_str_uarch_cuda(struct uarch* arch);
|
||||
char* get_str_cc(struct uarch* arch);
|
||||
char* get_str_chip(struct uarch* arch);
|
||||
char* get_str_process(struct uarch* arch);
|
||||
|
||||
12
src/intel/check.sh
Executable file
12
src/intel/check.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/bin/bash -u
|
||||
# Checks the difference between supported uarchs
|
||||
# and uarchs that have their topology available
|
||||
# in file uarch.cpp
|
||||
|
||||
uarchs="$(grep 'CHECK_UARCH' uarch.cpp | cut -d',' -f4-5 | grep 'UARCH_GEN' | tr -d ' ' | sort | uniq)"
|
||||
topos="$(grep 'CHECK_TOPO' uarch.cpp | cut -d',' -f3,4 | grep 'UARCH_' | tr -d ' ' | sort | uniq)"
|
||||
|
||||
echo "$uarchs" > /tmp/uarchs.txt
|
||||
echo "$topos" > /tmp/topos.txt
|
||||
meld /tmp/uarchs.txt /tmp/topos.txt
|
||||
rm -f /tmp/uarchs.txt /tmp/topos.txt
|
||||
59
src/intel/chips.hpp
Normal file
59
src/intel/chips.hpp
Normal file
@@ -0,0 +1,59 @@
|
||||
#ifndef __INTEL_GPUCHIPS__
|
||||
#define __INTEL_GPUCHIPS__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint32_t GPUCHIP;
|
||||
|
||||
enum {
|
||||
CHIP_UNKNOWN_INTEL,
|
||||
// Gen6
|
||||
CHIP_HD_2000,
|
||||
CHIP_HD_3000,
|
||||
// Gen7
|
||||
CHIP_HD_2500,
|
||||
CHIP_HD_4000,
|
||||
CHIP_HD_P4000,
|
||||
// Gen7.5
|
||||
CHIP_HD_4200,
|
||||
CHIP_HD_4400,
|
||||
CHIP_HD_4600,
|
||||
CHIP_HD_P4600,
|
||||
CHIP_IRIS_5100,
|
||||
CHIP_IRISP_5200,
|
||||
CHIP_IRISP_P5200,
|
||||
// Gen8
|
||||
CHIP_HD_5300,
|
||||
CHIP_HD_5500,
|
||||
CHIP_HD_5600,
|
||||
CHIP_HD_P5700,
|
||||
CHIP_HD_6000,
|
||||
CHIP_IRIS_6100,
|
||||
CHIP_IRISP_6200,
|
||||
CHIP_IRISP_P6300,
|
||||
// Gen9
|
||||
CHIP_HD_510,
|
||||
CHIP_HD_515,
|
||||
CHIP_HD_520,
|
||||
CHIP_HD_530,
|
||||
CHIP_HD_P530,
|
||||
CHIP_HD_540,
|
||||
CHIP_HD_550,
|
||||
CHIP_IRIS_P555,
|
||||
CHIP_IRIS_580,
|
||||
CHIP_IRIS_P580,
|
||||
// Gen9.5
|
||||
CHIP_UHD_600,
|
||||
CHIP_UHD_605,
|
||||
CHIP_UHD_620,
|
||||
CHIP_UHD_630,
|
||||
CHIP_HD_610,
|
||||
CHIP_HD_615,
|
||||
CHIP_HD_620,
|
||||
CHIP_HD_630,
|
||||
CHIP_HD_P630,
|
||||
CHIP_IRISP_640,
|
||||
CHIP_IRISP_650,
|
||||
};
|
||||
|
||||
#endif
|
||||
46
src/intel/intel.cpp
Normal file
46
src/intel/intel.cpp
Normal file
@@ -0,0 +1,46 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "intel.hpp"
|
||||
#include "uarch.hpp"
|
||||
#include "chips.hpp"
|
||||
#include "udev.hpp"
|
||||
#include "../common/pci.hpp"
|
||||
#include "../common/global.hpp"
|
||||
|
||||
int64_t get_peak_performance_intel(struct gpu_info* gpu) {
|
||||
return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2;
|
||||
}
|
||||
|
||||
struct gpu_info* get_gpu_info_intel() {
|
||||
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
||||
gpu->vendor = GPU_VENDOR_INTEL;
|
||||
|
||||
struct pci_dev *devices = get_pci_devices_from_pciutils();
|
||||
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
|
||||
|
||||
if(gpu->pci == NULL) {
|
||||
// No Intel iGPU found in PCI, which means it is not present
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gpu->arch = get_uarch_from_pci(gpu->pci);
|
||||
gpu->name = get_name_from_uarch(gpu->arch);
|
||||
gpu->topo_i = get_topology_info(gpu->arch);
|
||||
gpu->freq = get_max_freq_from_file(gpu->pci);
|
||||
gpu->peak_performance = get_peak_performance_intel(gpu);
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
bool print_gpu_intel(struct gpu_info* gpu) {
|
||||
if(gpu->vendor != GPU_VENDOR_INTEL) return false;
|
||||
|
||||
printf("Intel %s\n", gpu->name);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
char* get_str_eu(struct gpu_info* gpu) {
|
||||
return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice);
|
||||
}
|
||||
10
src/intel/intel.hpp
Normal file
10
src/intel/intel.hpp
Normal file
@@ -0,0 +1,10 @@
|
||||
#ifndef __INTEL_GPU__
|
||||
#define __INTEL_GPU__
|
||||
|
||||
#include "../common/gpu.hpp"
|
||||
|
||||
struct gpu_info* get_gpu_info_intel();
|
||||
bool print_gpu_intel(struct gpu_info* gpu);
|
||||
char* get_str_eu(struct gpu_info* gpu);
|
||||
|
||||
#endif
|
||||
88
src/intel/pci.cpp
Normal file
88
src/intel/pci.cpp
Normal file
@@ -0,0 +1,88 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#include "pci.hpp"
|
||||
#include "chips.hpp"
|
||||
#include "../common/global.hpp"
|
||||
#include "../common/pci.hpp"
|
||||
|
||||
#define CHECK_PCI_START if (false) {}
|
||||
#define CHECK_PCI(pci, id, chip) \
|
||||
else if (pci->device_id == id) return chip;
|
||||
#define CHECK_PCI_END else { printBug("Unkown Intel device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_INTEL; }
|
||||
|
||||
/*
|
||||
* https://github.com/mesa3d/mesa/blob/main/include/pci_ids/i965_pci_ids.h
|
||||
*/
|
||||
GPUCHIP get_chip_from_pci_intel(struct pci* pci) {
|
||||
CHECK_PCI_START
|
||||
// Gen6
|
||||
CHECK_PCI(pci, 0x0102, CHIP_HD_2000)
|
||||
CHECK_PCI(pci, 0x0106, CHIP_HD_2000)
|
||||
CHECK_PCI(pci, 0x010A, CHIP_HD_2000)
|
||||
CHECK_PCI(pci, 0x0112, CHIP_HD_3000)
|
||||
CHECK_PCI(pci, 0x0122, CHIP_HD_3000)
|
||||
CHECK_PCI(pci, 0x0116, CHIP_HD_3000)
|
||||
CHECK_PCI(pci, 0x0126, CHIP_HD_3000)
|
||||
// Gen7
|
||||
CHECK_PCI(pci, 0x0152, CHIP_HD_2500)
|
||||
CHECK_PCI(pci, 0x0156, CHIP_HD_2500)
|
||||
CHECK_PCI(pci, 0x0162, CHIP_HD_4000)
|
||||
CHECK_PCI(pci, 0x0166, CHIP_HD_4000)
|
||||
CHECK_PCI(pci, 0x016a, CHIP_HD_P4000)
|
||||
// Gen7.5
|
||||
CHECK_PCI(pci, 0x0A1E, CHIP_HD_4200)
|
||||
CHECK_PCI(pci, 0x041E, CHIP_HD_4400)
|
||||
CHECK_PCI(pci, 0x0A16, CHIP_HD_4400)
|
||||
CHECK_PCI(pci, 0x0412, CHIP_HD_4600)
|
||||
CHECK_PCI(pci, 0x0416, CHIP_HD_4600)
|
||||
CHECK_PCI(pci, 0x0D12, CHIP_HD_4600)
|
||||
CHECK_PCI(pci, 0x041A, CHIP_HD_P4600)
|
||||
CHECK_PCI(pci, 0x0A2E, CHIP_IRIS_5100)
|
||||
CHECK_PCI(pci, 0x0D22, CHIP_IRISP_5200)
|
||||
CHECK_PCI(pci, 0x0D26, CHIP_IRISP_P5200)
|
||||
// Gen8
|
||||
CHECK_PCI(pci, 0x161E, CHIP_HD_5300)
|
||||
CHECK_PCI(pci, 0x1616, CHIP_HD_5500)
|
||||
CHECK_PCI(pci, 0x1612, CHIP_HD_5600)
|
||||
CHECK_PCI(pci, 0x161A, CHIP_HD_P5700)
|
||||
CHECK_PCI(pci, 0x1626, CHIP_HD_6000)
|
||||
CHECK_PCI(pci, 0x162B, CHIP_IRIS_6100)
|
||||
CHECK_PCI(pci, 0x1622, CHIP_IRISP_6200)
|
||||
CHECK_PCI(pci, 0x162A, CHIP_IRISP_P6300)
|
||||
// Gen9
|
||||
CHECK_PCI(pci, 0x1902, CHIP_HD_510)
|
||||
CHECK_PCI(pci, 0x1906, CHIP_HD_510)
|
||||
CHECK_PCI(pci, 0x190B, CHIP_HD_510)
|
||||
CHECK_PCI(pci, 0x191E, CHIP_HD_515)
|
||||
CHECK_PCI(pci, 0x1916, CHIP_HD_520)
|
||||
CHECK_PCI(pci, 0x1921, CHIP_HD_520)
|
||||
CHECK_PCI(pci, 0x1912, CHIP_HD_530)
|
||||
CHECK_PCI(pci, 0x191B, CHIP_HD_530)
|
||||
CHECK_PCI(pci, 0x191D, CHIP_HD_P530)
|
||||
/*CHECK_PCI(pci, 0x5917, CHIP_HD_540)
|
||||
CHECK_PCI(pci, 0x5917, CHIP_HD_550)
|
||||
CHECK_PCI(pci, 0x5917, CHIP_HD_P555)
|
||||
CHECK_PCI(pci, 0x5917, CHIP_HD_580)
|
||||
CHECK_PCI(pci, 0x5917, CHIP_HD_P580)*/
|
||||
// Gen9.5
|
||||
CHECK_PCI(pci, 0x3185, CHIP_UHD_600)
|
||||
CHECK_PCI(pci, 0x3184, CHIP_UHD_605)
|
||||
CHECK_PCI(pci, 0x5917, CHIP_UHD_620)
|
||||
CHECK_PCI(pci, 0x3E91, CHIP_UHD_630)
|
||||
CHECK_PCI(pci, 0x3E92, CHIP_UHD_630)
|
||||
CHECK_PCI(pci, 0x3E98, CHIP_UHD_630)
|
||||
CHECK_PCI(pci, 0x3E9B, CHIP_UHD_630)
|
||||
CHECK_PCI(pci, 0x9BC5, CHIP_UHD_630)
|
||||
CHECK_PCI(pci, 0x9BC8, CHIP_UHD_630)
|
||||
CHECK_PCI(pci, 0x5902, CHIP_HD_610)
|
||||
CHECK_PCI(pci, 0x5906, CHIP_HD_610)
|
||||
CHECK_PCI(pci, 0x590B, CHIP_HD_610)
|
||||
CHECK_PCI(pci, 0x591E, CHIP_HD_615)
|
||||
CHECK_PCI(pci, 0x5912, CHIP_HD_630)
|
||||
CHECK_PCI(pci, 0x591B, CHIP_HD_630)
|
||||
CHECK_PCI(pci, 0x591A, CHIP_HD_P630)
|
||||
CHECK_PCI(pci, 0x591D, CHIP_HD_P630)
|
||||
CHECK_PCI(pci, 0x5926, CHIP_IRISP_640)
|
||||
CHECK_PCI(pci, 0x5927, CHIP_IRISP_650)
|
||||
CHECK_PCI_END
|
||||
}
|
||||
19
src/intel/pci.hpp
Normal file
19
src/intel/pci.hpp
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef __PCI_INTEL__
|
||||
#define __PCI_INTEL__
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../common/pci.hpp"
|
||||
#include "chips.hpp"
|
||||
|
||||
/*
|
||||
* doc: https://wiki.osdev.org/PCI#Class_Codes
|
||||
* https://pci-ids.ucw.cz/read/PC
|
||||
*/
|
||||
#define PCI_VENDOR_ID_INTEL 0x8086
|
||||
|
||||
struct pci;
|
||||
|
||||
GPUCHIP get_chip_from_pci_intel(struct pci* pci);
|
||||
|
||||
#endif
|
||||
212
src/intel/uarch.cpp
Normal file
212
src/intel/uarch.cpp
Normal file
@@ -0,0 +1,212 @@
|
||||
#include <stdint.h>
|
||||
#include <cstddef>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../common/uarch.hpp"
|
||||
#include "../common/global.hpp"
|
||||
#include "../common/gpu.hpp"
|
||||
#include "chips.hpp"
|
||||
#include "pci.hpp"
|
||||
|
||||
// Data not available
|
||||
#define NA -1
|
||||
|
||||
// Unknown manufacturing process
|
||||
#define UNK -1
|
||||
|
||||
/*
|
||||
* Mapping between iGPU and CPU uarchs
|
||||
* -----------------------------------
|
||||
* Gen6: Sandy Bridge (2th Gen)
|
||||
* Gen7: Ivy Brdige (3th Gen)
|
||||
* Gen7.5: Haswell (4th Gen)
|
||||
* Gen8: Broadwell (5th Gen)
|
||||
* Gen9: Skylake (6th Gen)
|
||||
* Gen9.5: Kaby Lake
|
||||
*/
|
||||
enum {
|
||||
UARCH_UNKNOWN,
|
||||
UARCH_GEN6,
|
||||
UARCH_GEN7,
|
||||
UARCH_GEN7_5,
|
||||
UARCH_GEN8,
|
||||
UARCH_GEN9,
|
||||
UARCH_GEN9_5,
|
||||
};
|
||||
|
||||
static const char *uarch_str[] = {
|
||||
/*[ARCH_UNKNOWN = */ STRING_UNKNOWN,
|
||||
/*[ARCH_GEN6] = */ "Gen6",
|
||||
/*[ARCH_GEN7] = */ "Gen7",
|
||||
/*[ARCH_GEN7_5] = */ "Gen7.5",
|
||||
/*[ARCH_GEN8] = */ "Gen8",
|
||||
/*[ARCH_GEN9] = */ "Gen9",
|
||||
/*[ARCH_GEN9_5] = */ "Gen9.5",
|
||||
};
|
||||
|
||||
// Graphic Tiers (GT)
|
||||
enum {
|
||||
GT_UNKNOWN,
|
||||
GT1,
|
||||
GT1_5,
|
||||
GT2,
|
||||
GT3,
|
||||
GT3e,
|
||||
GT4e
|
||||
};
|
||||
|
||||
static const char *gt_str[] = {
|
||||
/*[GT_UNKNOWN] = */ STRING_UNKNOWN,
|
||||
/*[GT1] = */ "GT1",
|
||||
/*[GT1_5] = */ "GT1.5",
|
||||
/*[GT2] = */ "GT2",
|
||||
/*[GT3] = */ "GT3",
|
||||
/*[GT3e] = */ "GT3e",
|
||||
/*[GT4e] = */ "GT4e",
|
||||
};
|
||||
|
||||
#define CHECK_UARCH_START if (false) {}
|
||||
#define CHECK_UARCH(arch, chip_, str, uarch, gt, process) \
|
||||
else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
|
||||
#define CHECK_UARCH_END else { printBug("map_chip_to_uarch_intel: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
|
||||
|
||||
#define CHECK_TOPO_START if (false) {}
|
||||
#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \
|
||||
else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli);
|
||||
#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); }
|
||||
|
||||
void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) {
|
||||
topo_i->slices = sli;
|
||||
topo_i->subslices = sub;
|
||||
topo_i->eu_subslice = eu_sub;
|
||||
}
|
||||
|
||||
void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
|
||||
arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
|
||||
strcpy(arch->chip_str, str);
|
||||
arch->uarch = u;
|
||||
arch->process = process;
|
||||
arch->gt = gt;
|
||||
}
|
||||
|
||||
void map_chip_to_uarch_intel(struct uarch* arch) {
|
||||
CHECK_UARCH_START
|
||||
// Gen6
|
||||
CHECK_UARCH(arch, CHIP_HD_2000, "HD Graphics 2000", UARCH_GEN6, GT1, 32)
|
||||
CHECK_UARCH(arch, CHIP_HD_3000, "HD Graphics 3000", UARCH_GEN6, GT2, 32)
|
||||
// Gen7
|
||||
CHECK_UARCH(arch, CHIP_HD_2500, "HD Graphics 2500", UARCH_GEN7, GT1, 22)
|
||||
CHECK_UARCH(arch, CHIP_HD_4000, "HD Graphics 4000", UARCH_GEN7, GT2, 22)
|
||||
CHECK_UARCH(arch, CHIP_HD_P4000, "HD Graphics P4000", UARCH_GEN7, GT2, 22)
|
||||
// Gen7.5
|
||||
CHECK_UARCH(arch, CHIP_HD_4200, "HD Graphics 4200", UARCH_GEN7_5, GT2, 22)
|
||||
CHECK_UARCH(arch, CHIP_HD_4400, "HD Graphics 4400", UARCH_GEN7_5, GT2, 22)
|
||||
CHECK_UARCH(arch, CHIP_HD_4600, "HD Graphics 4600", UARCH_GEN7_5, GT2, 22)
|
||||
CHECK_UARCH(arch, CHIP_HD_P4600, "HD Graphics P4600", UARCH_GEN7_5, GT2, 22)
|
||||
CHECK_UARCH(arch, CHIP_IRIS_5100, "HD Iris 5100", UARCH_GEN7_5, GT3, 22)
|
||||
CHECK_UARCH(arch, CHIP_IRISP_5200, "HD Iris Pro 5200", UARCH_GEN7_5, GT3, 22)
|
||||
CHECK_UARCH(arch, CHIP_IRISP_P5200, "HD Iris Pro P5200", UARCH_GEN7_5, GT3, 22)
|
||||
// Gen8
|
||||
CHECK_UARCH(arch, CHIP_HD_5300, "HD Graphics 5300", UARCH_GEN8, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_5500, "HD Graphics 5500", UARCH_GEN8, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_5600, "HD Graphics 5600", UARCH_GEN8, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_P5700, "HD Graphics P5700", UARCH_GEN8, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_6000, "HD Graphics 6000", UARCH_GEN8, GT3, 14)
|
||||
CHECK_UARCH(arch, CHIP_IRIS_6100, "Iris Graphics 6100", UARCH_GEN8, GT3, 14)
|
||||
CHECK_UARCH(arch, CHIP_IRISP_6200, "Iris Pro Graphics 6200", UARCH_GEN8, GT3, 14)
|
||||
CHECK_UARCH(arch, CHIP_IRISP_P6300, "Iris Pro Graphics P6300", UARCH_GEN8, GT3, 14)
|
||||
// Gen9
|
||||
CHECK_UARCH(arch, CHIP_HD_510, "HD Graphics 510", UARCH_GEN9, GT1, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_515, "HD Graphics 515", UARCH_GEN9, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_520, "HD Graphics 520", UARCH_GEN9, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_530, "HD Graphics 530", UARCH_GEN9, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_P530, "HD Graphics P530", UARCH_GEN9, GT2, 14)
|
||||
// Gen9.5
|
||||
CHECK_UARCH(arch, CHIP_UHD_600, "UHD Graphics 600", UARCH_GEN9_5, GT1, 14)
|
||||
CHECK_UARCH(arch, CHIP_UHD_605, "UHD Graphics 605", UARCH_GEN9_5, GT1_5, 14)
|
||||
CHECK_UARCH(arch, CHIP_UHD_620, "UHD Graphics 620", UARCH_GEN9_5, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_UHD_630, "UHD Graphics 630", UARCH_GEN9_5, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_610, "HD Graphics 610", UARCH_GEN9_5, GT1, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_615, "HD Graphics 615", UARCH_GEN9_5, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_630, "HD Graphics 630", UARCH_GEN9_5, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_HD_P630, "HD Graphics P630", UARCH_GEN9_5, GT2, 14)
|
||||
CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 640", UARCH_GEN9_5, GT3e, 14)
|
||||
CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 650", UARCH_GEN9_5, GT3e, 14)
|
||||
CHECK_UARCH_END
|
||||
}
|
||||
|
||||
const char* get_str_uarch_intel(struct uarch* arch) {
|
||||
return uarch_str[arch->uarch];
|
||||
}
|
||||
|
||||
const char* get_str_gt(struct uarch* arch) {
|
||||
return gt_str[arch->gt];
|
||||
}
|
||||
|
||||
struct uarch* get_uarch_from_pci(struct pci* pci) {
|
||||
struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
|
||||
|
||||
arch->chip_str = NULL;
|
||||
arch->chip = get_chip_from_pci_intel(pci);
|
||||
if(arch->chip == CHIP_UNKNOWN_INTEL) {
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
map_chip_to_uarch_intel(arch);
|
||||
return arch;
|
||||
}
|
||||
}
|
||||
|
||||
char* get_name_from_uarch(struct uarch* arch) {
|
||||
char* name = (char *) emalloc(sizeof(char) * (strlen(arch->chip_str) + 6 + 1));
|
||||
sprintf(name, "Intel %s", arch->chip_str);
|
||||
return name;
|
||||
}
|
||||
|
||||
/*
|
||||
* Refs:
|
||||
* Gen6: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen6
|
||||
* Gen7/7.5: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen7
|
||||
"The Compute Architecture of Intel Processor Graphics Gen7.5, v1.0"
|
||||
* Gen8: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen8
|
||||
"The Compute Architecture of Intel Processor Graphics Gen8, v1.1"
|
||||
* Gen9: https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration
|
||||
"The Compute Architecture of Intel Processor Graphics Gen9, v1.0"
|
||||
* Gen9.5: https://en.wikichip.org/wiki/intel/microarchitectures/gen9.5#Configuration
|
||||
*/
|
||||
struct topology_i* get_topology_info(struct uarch* arch) {
|
||||
struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i));
|
||||
|
||||
// Syntax: (EU per subslice, Subslices, Slices)
|
||||
CHECK_TOPO_START
|
||||
// Gen6
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN6, GT1, 6, 1, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN6, GT2, 6, 2, 1)
|
||||
// Gen7
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN7, GT1, 6, 1, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN7, GT2, 8, 2, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN7, GT3, 6, 1, 1)
|
||||
// Gen7.5
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT1, 10, 1, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT2, 10, 2, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT3, 10, 4, 1)
|
||||
// Gen8
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN8, GT1, 6, 2, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN8, GT2, 8, 3, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN8, GT3, 8, 6, 2)
|
||||
// Gen9
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9, GT1, 6, 2, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9, GT2, 8, 3, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9, GT3, 8, 6, 2)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9, GT4e, 8, 9, 3)
|
||||
// Gen9.5
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1, 6, 2, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1_5, 6, 3, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2, 8, 3, 1)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3, 8, 6, 2)
|
||||
CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3e, 8, 6, 2) // Same as GT3, but has eDRAM cache
|
||||
CHECK_TOPO_END
|
||||
|
||||
return topo;
|
||||
}
|
||||
14
src/intel/uarch.hpp
Normal file
14
src/intel/uarch.hpp
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef __INTEL_UARCH__
|
||||
#define __INTEL_UARCH__
|
||||
|
||||
#include "../common/gpu.hpp"
|
||||
|
||||
struct uarch;
|
||||
|
||||
struct uarch* get_uarch_from_pci(struct pci* pci);
|
||||
char* get_name_from_uarch(struct uarch* arch);
|
||||
char* get_str_gt(struct uarch* arch);
|
||||
char* get_str_uarch_intel(struct uarch* arch);
|
||||
struct topology_i* get_topology_info(struct uarch* arch);
|
||||
|
||||
#endif
|
||||
89
src/intel/udev.cpp
Normal file
89
src/intel/udev.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <cerrno>
|
||||
#include <cstdio>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../common/global.hpp"
|
||||
#include "../common/pci.hpp"
|
||||
|
||||
#define _PATH_SYS_SYSTEM "/sys/devices/pci0000:00"
|
||||
#define _PATH_SYS_DRM "/drm"
|
||||
#define _PATH_CARD "/card0"
|
||||
#define _PATH_FREQUENCY_MAX "/gt_max_freq_mhz"
|
||||
#define _PATH_FREQUENCY_MIN "/gt_min_freq_mhz"
|
||||
|
||||
#define _PATH_FREQUENCY_MAX_LEN 100
|
||||
#define DEFAULT_FILE_SIZE 4096
|
||||
#define UNKNOWN_DATA -1
|
||||
|
||||
char* read_file(char* path, int* len) {
|
||||
int fd = open(path, O_RDONLY);
|
||||
|
||||
if(fd == -1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//File exists, read it
|
||||
int bytes_read = 0;
|
||||
int offset = 0;
|
||||
int block = 128;
|
||||
char* buf = (char *) emalloc(sizeof(char)*DEFAULT_FILE_SIZE);
|
||||
memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE);
|
||||
|
||||
while ( (bytes_read = read(fd, buf+offset, block)) > 0 ) {
|
||||
offset += bytes_read;
|
||||
}
|
||||
|
||||
if (close(fd) == -1) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*len = offset;
|
||||
return buf;
|
||||
}
|
||||
|
||||
long get_freq_from_file(char* path) {
|
||||
int filelen;
|
||||
char* buf;
|
||||
if((buf = read_file(path, &filelen)) == NULL) {
|
||||
printWarn("Could not open '%s'", path);
|
||||
return UNKNOWN_DATA;
|
||||
}
|
||||
|
||||
char* end;
|
||||
errno = 0;
|
||||
long ret = strtol(buf, &end, 10);
|
||||
if(errno != 0) {
|
||||
printBug("strtol: %s", strerror(errno));
|
||||
free(buf);
|
||||
return UNKNOWN_DATA;
|
||||
}
|
||||
|
||||
// We will be getting the frequency in MHz
|
||||
// We consider it is an error if frequency is
|
||||
// greater than 10 GHz or less than 100 MHz
|
||||
if(ret > 10000 || ret < 100) {
|
||||
printBug("Invalid data was read from file '%s': %ld\n", path, ret);
|
||||
return UNKNOWN_DATA;
|
||||
}
|
||||
|
||||
free(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long get_max_freq_from_file(struct pci* pci) {
|
||||
char path[_PATH_FREQUENCY_MAX_LEN];
|
||||
sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MAX);
|
||||
return get_freq_from_file(path);
|
||||
}
|
||||
|
||||
long get_min_freq_from_file(struct pci* pci) {
|
||||
char path[_PATH_FREQUENCY_MAX_LEN];
|
||||
sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MIN);
|
||||
return get_freq_from_file(path);
|
||||
}
|
||||
7
src/intel/udev.hpp
Normal file
7
src/intel/udev.hpp
Normal file
@@ -0,0 +1,7 @@
|
||||
#ifndef __UDEV__
|
||||
#define __UDEV__
|
||||
|
||||
long get_max_freq_from_file(struct pci* pci);
|
||||
long get_min_freq_from_file(struct pci* pci);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user