diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a6fb9b..a97d1b6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,23 +7,22 @@ project(gpufetch CXX) set(SRC_DIR "src") set(COMMON_DIR "${SRC_DIR}/common") set(CUDA_DIR "${SRC_DIR}/cuda") +set(INTEL_DIR "${SRC_DIR}/intel") -if(NOT WIN32) - string(ASCII 27 Esc) - set(ColorReset "${Esc}[m") - set(ColorBold "${Esc}[1m") - set(Red "${Esc}[31m") - set(Green "${Esc}[32m") - set(BoldRed "${Esc}[1;31m") - set(BoldGreen "${Esc}[1;32m") - set(BoldYellow "${Esc}[1;33m") +if(NOT DEFINED ENABLE_INTEL_BACKEND) + set(ENABLE_INTEL_BACKEND true) endif() -check_language(CUDA) -if(CMAKE_CUDA_COMPILER) - enable_language(CUDA) -else() - message(FATAL_ERROR "${BoldRed}[ERROR]${ColorReset} Unable to find CUDA compiler. You may use -DCMAKE_CUDA_COMPILER and -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT if CUDA is installed but not detected by CMake") +if(NOT DEFINED ENABLE_CUDA_BACKEND OR ENABLE_CUDA_BACKEND) + check_language(CUDA) + if(CMAKE_CUDA_COMPILER) + enable_language(CUDA) + set(ENABLE_CUDA_BACKEND true) + # Must link_directories early so add_executable(gpufetch ...) gets the right directories + link_directories(cuda_backend ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib) + else() + set(ENABLE_CUDA_BACKEND false) + endif() endif() list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") @@ -48,34 +47,73 @@ else() link_libraries(${PCIUTILS_LIBRARIES}) endif() +add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp) set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith") -set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic") +set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11") -# https://en.wikipedia.org/w/index.php?title=CUDA§ion=5#GPUs_supported -# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake -if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0") - set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86) -elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0") - set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75) -elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0") - set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72) -elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0") - set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62) +if(ENABLE_INTEL_BACKEND) + target_compile_definitions(gpufetch PUBLIC BACKEND_INTEL) + + add_library(intel_backend STATIC ${INTEL_DIR}/intel.cpp ${INTEL_DIR}/pci.cpp ${INTEL_DIR}/uarch.cpp ${INTEL_DIR}/udev.cpp) + + if(NOT ${PCIUTILS_FOUND}) + add_dependencies(intel_backend pciutils) + endif() + + target_link_libraries(gpufetch intel_backend) endif() -link_directories(${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib) +if(ENABLE_CUDA_BACKEND) + target_compile_definitions(gpufetch PUBLIC BACKEND_CUDA) -add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp) -add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp) + # https://en.wikipedia.org/w/index.php?title=CUDA§ion=5#GPUs_supported + # https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake + if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0") + set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86) + elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0") + set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75) + elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0") + set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72) + elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0") + set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62) + endif() -if(NOT ${PCIUTILS_FOUND}) - add_dependencies(cuda_backend pciutils) - add_dependencies(gpufetch pciutils) + add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp) + + if(NOT ${PCIUTILS_FOUND}) + add_dependencies(cuda_backend pciutils) + endif() + + target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include) + + target_link_libraries(cuda_backend PRIVATE cudart) + target_link_libraries(gpufetch cuda_backend) endif() -target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include) - -target_link_libraries(cuda_backend cudart) -target_link_libraries(gpufetch cuda_backend pci z) - +target_link_libraries(gpufetch pci z) install(TARGETS gpufetch DESTINATION bin) + +if(NOT WIN32) + string(ASCII 27 Esc) + set(ColorReset "${Esc}[m") + set(ColorBold "${Esc}[1m") + set(Red "${Esc}[31m") + set(Green "${Esc}[32m") + set(BoldRed "${Esc}[1;31m") + set(BoldGreen "${Esc}[1;32m") + set(BoldYellow "${Esc}[1;33m") +endif() + +message(STATUS "----------------------") +message(STATUS "gpufetch build report:") +if(ENABLE_INTEL_BACKEND) + message(STATUS "Intel backend: ${BoldGreen}ON${ColorReset}") +else() + message(STATUS "Intel backend: ${BoldRed}OFF${ColorReset}") +endif() +if(ENABLE_CUDA_BACKEND) + message(STATUS "CUDA backend: ${BoldGreen}ON${ColorReset}") +else() + message(STATUS "CUDA backend: ${BoldRed}OFF${ColorReset}") +endif() +message(STATUS "----------------------") diff --git a/src/common/args.cpp b/src/common/args.cpp index e72f291..14bd6a6 100644 --- a/src/common/args.cpp +++ b/src/common/args.cpp @@ -13,8 +13,13 @@ #define NUM_COLORS 4 #define COLOR_STR_NVIDIA "nvidia" +#define COLOR_STR_INTEL "intel" -#define COLOR_DEFAULT_NVIDIA "118,185,0:255,255,255:255,255,255:118,185,0" +// +-----------------------+-----------------------+ +// | Color logo | Color text | +// | Color 1 | Color 2 | Color 1 | Color 2 | +#define COLOR_DEFAULT_NVIDIA "118,185,000:255,255,255:255,255,255:118,185,000" +#define COLOR_DEFAULT_INTEL "015,125,194:230,230,230:040,150,220:230,230,230" struct args_struct { bool help_flag; @@ -145,6 +150,7 @@ bool parse_color(char* optarg_str, struct color*** cs) { bool free_ptr = true; if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA; + else if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) color_to_copy = COLOR_DEFAULT_INTEL; else { str_to_parse = optarg_str; free_ptr = false; diff --git a/src/common/ascii.hpp b/src/common/ascii.hpp index dbae762..ee6a470 100644 --- a/src/common/ascii.hpp +++ b/src/common/ascii.hpp @@ -1,32 +1,32 @@ #ifndef __ASCII__ #define __ASCII__ -#define COLOR_NONE "" -#define COLOR_FG_BLACK "\x1b[30;1m" -#define COLOR_FG_RED "\x1b[31;1m" -#define COLOR_FG_GREEN "\x1b[32;1m" -#define COLOR_FG_YELLOW "\x1b[33;1m" -#define COLOR_FG_BLUE "\x1b[34;1m" -#define COLOR_FG_MAGENTA "\x1b[35;1m" -#define COLOR_FG_CYAN "\x1b[36;1m" -#define COLOR_FG_WHITE "\x1b[37;1m" -#define COLOR_BG_BLACK "\x1b[40;1m" -#define COLOR_BG_RED "\x1b[41;1m" -#define COLOR_BG_GREEN "\x1b[42;1m" -#define COLOR_BG_YELLOW "\x1b[43;1m" -#define COLOR_BG_BLUE "\x1b[44;1m" -#define COLOR_BG_MAGENTA "\x1b[45;1m" -#define COLOR_BG_CYAN "\x1b[46;1m" -#define COLOR_BG_WHITE "\x1b[47;1m" -#define COLOR_FG_B_BLACK "\x1b[90;1m" -#define COLOR_FG_B_RED "\x1b[91;1m" -#define COLOR_FG_B_GREEN "\x1b[92;1m" -#define COLOR_FG_B_YELLOW "\x1b[93;1m" -#define COLOR_FG_B_BLUE "\x1b[94;1m" -#define COLOR_FG_B_MAGENTA "\x1b[95;1m" -#define COLOR_FG_B_CYAN "\x1b[96;1m" -#define COLOR_FG_B_WHITE "\x1b[97;1m" -#define COLOR_RESET "\x1b[m" +#define C_NONE "" +#define C_FG_BLACK "\x1b[30;1m" +#define C_FG_RED "\x1b[31;1m" +#define C_FG_GREEN "\x1b[32;1m" +#define C_FG_YELLOW "\x1b[33;1m" +#define C_FG_BLUE "\x1b[34;1m" +#define C_FG_MAGENTA "\x1b[35;1m" +#define C_FG_CYAN "\x1b[36;1m" +#define C_FG_WHITE "\x1b[37;1m" +#define C_BG_BLACK "\x1b[40;1m" +#define C_BG_RED "\x1b[41;1m" +#define C_BG_GREEN "\x1b[42;1m" +#define C_BG_YELLOW "\x1b[43;1m" +#define C_BG_BLUE "\x1b[44;1m" +#define C_BG_MAGENTA "\x1b[45;1m" +#define C_BG_CYAN "\x1b[46;1m" +#define C_BG_WHITE "\x1b[47;1m" +#define C_FG_B_BLACK "\x1b[90;1m" +#define C_FG_B_RED "\x1b[91;1m" +#define C_FG_B_GREEN "\x1b[92;1m" +#define C_FG_B_YELLOW "\x1b[93;1m" +#define C_FG_B_BLUE "\x1b[94;1m" +#define C_FG_B_MAGENTA "\x1b[95;1m" +#define C_FG_B_CYAN "\x1b[96;1m" +#define C_FG_B_WHITE "\x1b[97;1m" +#define C_RESET "\x1b[m" struct ascii_logo { const char* art; @@ -59,6 +59,23 @@ $C2## ## ## ## ## ## ## ## #: :# \ $C2## ## ## ## ## ## ## ## ####### \ $C2## ## ### ## ###### ## ## ## " +#define ASCII_INTEL \ +"$C1 .#################. \ +$C1 .#### ####. \ +$C1 .## ### \ +$C1 ## :## ### \ +$C1 # ## :## ## \ +$C1 ## ## ######. #### ###### :## ## \ +$C1 ## ## ##: ##: ## ## ### :## ### \ +$C1## ## ##: ##: ## :######## :## ## \ +$C1## ## ##: ##: ## ##. . :## #### \ +$C1## # ##: ##: #### #####: ## \ +$C1 ## \ +$C1 ###. ..o####. \ +$C1 ######oo... ..oo####### \ +$C1 o###############o " + +// LONG LOGOS #define ASCII_NVIDIA_L \ "$C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \ $C1 MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM \ @@ -76,14 +93,37 @@ $C1 olcc::; ,:ccloMMMMMMMMM \ $C1 :......oMMMMMMMMMMMMMMMMMMMMMM \ $C1 :lllMMMMMMMMMMMMMMMMMMMMMMMMMM " +#define ASCII_INTEL_L \ +"$C1 ###############@ \ +$C1 ######@ ######@ \ +$C1 ###@ ###@ \ +$C1 ##@ ###@ \ +$C1 ##@ ##@ \ +$C1 ##@ ##@ \ +$C1 @ ##@ ##@ ##@ \ +$C1 #@ ##@ ########@ #####@ #####@ ##@ ##@ \ +$C1 #@ ##@ ##@ ##@ ##@ ###@ ###@ ##@ ##@ \ +$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ ##@ \ +$C1 #@ ##@ ##@ ##@ ##@ #########@ ##@ ###@ \ +$C1 #@ ##@ ##@ ##@ ##@ ##@ ##@ ####@ \ +$C1 #@ #@ ##@ ##@ ####@ ########@ #@ ##@ \ +$C1 ##@ \ +$C1 ##@ \ +$C1 ###@ ###@ \ +$C1 ####@ #########@ \ +$C1 #########@ ###############@ \ +$C1 ##############################@ " + typedef struct ascii_logo asciiL; -// ------------------------------------------------------------------------------------------------------ -// | LOGO | W | H | REPLACE | COLORS LOGO (>0 && <10) | COLORS TEXT (=2) | -// ------------------------------------------------------------------------------------------------------ -asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} }; -// Long variants | ---------------------------------------------------------------------------------------------------| -asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} }; -asciiL logo_unknown = { NULL, 0, 0, false, {COLOR_NONE}, {COLOR_NONE, COLOR_NONE} }; +// ------------------------------------------------------------------------------------------ +// | LOGO | W | H | REPLACE | COLORS LOGO | COLORS TEXT | +// ------------------------------------------------------------------------------------------ +asciiL logo_nvidia = { ASCII_NVIDIA, 45, 19, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} }; +asciiL logo_intel = { ASCII_INTEL, 48, 14, false, {C_FG_CYAN}, {C_FG_CYAN, C_FG_WHITE} }; +// Long variants | ---------------------------------------------------------------------------------------| +asciiL logo_nvidia_l = { ASCII_NVIDIA_L, 50, 15, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} }; +asciiL logo_intel_l = { ASCII_INTEL_L, 62, 19, true, {C_BG_CYAN, C_BG_WHITE}, {C_FG_CYAN, C_FG_WHITE} }; +asciiL logo_unknown = { NULL, 0, 0, false, {C_NONE}, {C_NONE, C_NONE} }; #endif diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp index ed00694..c25e1fa 100644 --- a/src/common/gpu.cpp +++ b/src/common/gpu.cpp @@ -32,8 +32,6 @@ VENDOR get_gpu_vendor(struct gpu_info* gpu) { return gpu->vendor; } -double trunc(double val) { return ((int)(100 * val)) / 100.0; } - int32_t get_value_as_smallest_unit(char ** str, uint64_t value) { int32_t ret; int max_len = 10; // Max is 8 for digits, 2 for units @@ -145,6 +143,13 @@ char* get_str_peak_performance(struct gpu_info* gpu) { } char* get_str_peak_performance_tensor(struct gpu_info* gpu) { - return get_str_peak_performance_generic(gpu->peak_performance_t); + return get_str_peak_performance_generic(gpu->peak_performance_tcu); } +char* get_str_generic(int32_t data) { + // Largest int is 10, +1 for possible negative, +1 for EOL + uint32_t max_size = 12; + char* dummy = (char *) ecalloc(max_size, sizeof(char)); + snprintf(dummy, max_size, "%d", data); + return dummy; +} diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index 2928a11..8f308c3 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -9,7 +9,8 @@ #define UNKNOWN_FREQ -1 enum { - GPU_VENDOR_NVIDIA + GPU_VENDOR_NVIDIA, + GPU_VENDOR_INTEL }; enum { @@ -43,6 +44,12 @@ struct topology { int32_t tensor_cores; }; +struct topology_i { + int32_t slices; + int32_t subslices; + int32_t eu_subslice; +}; + struct memory { int64_t size_bytes; MEMTYPE type; @@ -58,10 +65,11 @@ struct gpu_info { int64_t freq; struct pci* pci; struct topology* topo; + struct topology_i* topo_i; struct memory* mem; struct cache* cach; int64_t peak_performance; - int64_t peak_performance_t; + int64_t peak_performance_tcu; int32_t idx; }; @@ -75,5 +83,6 @@ char* get_str_memory_clock(struct gpu_info* gpu); char* get_str_l2(struct gpu_info* gpu); char* get_str_peak_performance(struct gpu_info* gpu); char* get_str_peak_performance_tensor(struct gpu_info* gpu); +char* get_str_generic(int32_t data); #endif diff --git a/src/common/main.cpp b/src/common/main.cpp index 899ff89..2e51d86 100644 --- a/src/common/main.cpp +++ b/src/common/main.cpp @@ -4,6 +4,7 @@ #include "args.hpp" #include "global.hpp" +#include "master.hpp" #include "../cuda/cuda.hpp" #include "../cuda/uarch.hpp" @@ -65,18 +66,19 @@ int main(int argc, char* argv[]) { return EXIT_SUCCESS; } + struct gpu_list* list = get_gpu_list(); if(list_gpus()) { - return print_gpus_list(); + return print_gpus_list(list); } set_log_level(true); - printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\ + printf("[WARNING]: gpufetch is in beta. The provided information may be incomplete or wrong.\n\ If you want to help to improve gpufetch, please compare the output of the program\n\ with a reliable source which you know is right (e.g, techpowerup.com) and report\n\ -any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues"); +any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues\n"); - struct gpu_info* gpu = get_gpu_info(get_gpu_idx()); + struct gpu_info* gpu = get_gpu_info(list, get_gpu_idx()); if(gpu == NULL) return EXIT_FAILURE; diff --git a/src/common/master.cpp b/src/common/master.cpp new file mode 100644 index 0000000..d86711a --- /dev/null +++ b/src/common/master.cpp @@ -0,0 +1,62 @@ +#include +#include +#include + +#include "master.hpp" +#include "../cuda/cuda.hpp" +#include "../intel/intel.hpp" + +#define MAX_GPUS 1000 + +struct gpu_list { + struct gpu_info ** gpus; + int num_gpus; +}; + +struct gpu_list* get_gpu_list() { + int idx = 0; + struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list)); + list->num_gpus = 0; + list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS); + +#ifdef BACKEND_CUDA + bool valid = true; + + while(valid) { + list->gpus[idx] = get_gpu_info_cuda(idx); + if(list->gpus[idx] != NULL) idx++; + else valid = false; + } + + list->num_gpus += idx; +#endif + +#ifdef BACKEND_INTEL + list->gpus[idx] = get_gpu_info_intel(); + if(list->gpus[idx] != NULL) list->num_gpus++; +#endif + + return list; +} + +bool print_gpus_list(struct gpu_list* list) { + for(int i=0; i < list->num_gpus; i++) { + printf("GPU %d: ", i); + if(list->gpus[i]->vendor == GPU_VENDOR_NVIDIA) { + #ifdef BACKEND_CUDA + print_gpu_cuda(list->gpus[i]); + #endif + } + else if(list->gpus[i]->vendor == GPU_VENDOR_INTEL) { + #ifdef BACKEND_INTEL + print_gpu_intel(list->gpus[i]); + #endif + } + } + + return true; +} + +struct gpu_info* get_gpu_info(struct gpu_list* list, int idx) { + return list->gpus[idx]; +} diff --git a/src/common/master.hpp b/src/common/master.hpp new file mode 100644 index 0000000..5863cbc --- /dev/null +++ b/src/common/master.hpp @@ -0,0 +1,12 @@ +#ifndef __GPU_LIST__ +#define __GPU_LIST__ + +#include "gpu.hpp" + +struct gpu_list; + +struct gpu_list* get_gpu_list(); +bool print_gpus_list(struct gpu_list* list); +struct gpu_info* get_gpu_info(struct gpu_list* list, int idx); + +#endif diff --git a/src/common/pci.cpp b/src/common/pci.cpp index 2a25ad6..e1cb7fd 100644 --- a/src/common/pci.cpp +++ b/src/common/pci.cpp @@ -2,33 +2,61 @@ #include "pci.hpp" #include -/* - * doc: https://wiki.osdev.org/PCI#Class_Codes - * https://pci-ids.ucw.cz/read/PC - */ -#define VENDOR_ID_NVIDIA 0x10de #define CLASS_VGA_CONTROLLER 0x0300 -uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices) { +bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) { for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) { - if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) { - return dev->vendor_id; + if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) { + return true; } } - printErr("Unable to find a CUDA device using pciutils"); - return 0; + + printWarn("Unable to find a valid device for id %d using pciutils", id); + return false; } -uint16_t pciutils_get_pci_device_id(struct pci_dev *devices) { +uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) { for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) { - if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) { + if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) { return dev->device_id; } } - printErr("Unable to find a CUDA device using pciutils"); + + printErr("Unable to find a valid device for id %d using pciutils", id); return 0; } +void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) { + bool found = false; + + for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) { + if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) { + pci->domain = dev->domain; + pci->bus = dev->bus; + pci->dev = dev->dev; + pci->func = dev->func; + found = true; + } + } + + if(!found) printErr("Unable to find a valid device for id %d using pciutils", id); +} + +struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) { + struct pci* pci = (struct pci*) emalloc(sizeof(struct pci)); + + // TODO: Refactor this; instead of 2xGet + 1xSet, do it better + if(pciutils_is_vendor_id_present(devices, id)) { + pci->vendor_id = id; + pci->device_id = pciutils_get_pci_device_id(devices, id); + pciutils_set_pci_bus(pci, devices, id); + return pci; + } + else { + return NULL; + } +} + struct pci_dev *get_pci_devices_from_pciutils() { struct pci_access *pacc; struct pci_dev *dev; diff --git a/src/common/pci.hpp b/src/common/pci.hpp index b3d3b94..7214545 100644 --- a/src/common/pci.hpp +++ b/src/common/pci.hpp @@ -6,8 +6,16 @@ extern "C" { #include } -uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices); -uint16_t pciutils_get_pci_device_id(struct pci_dev *devices); +struct pci { + uint16_t vendor_id; + uint16_t device_id; + uint16_t domain; + uint16_t bus; + uint16_t dev; + uint16_t func; +}; + +struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id); struct pci_dev *get_pci_devices_from_pciutils(); #endif diff --git a/src/common/printer.cpp b/src/common/printer.cpp index 2e490ac..00d5404 100644 --- a/src/common/printer.cpp +++ b/src/common/printer.cpp @@ -9,6 +9,8 @@ #include "../common/global.hpp" #include "../common/gpu.hpp" +#include "../intel/uarch.hpp" +#include "../intel/intel.hpp" #include "../cuda/cuda.hpp" #include "../cuda/uarch.hpp" @@ -34,11 +36,13 @@ enum { ATTRIBUTE_CHIP, ATTRIBUTE_UARCH, ATTRIBUTE_TECHNOLOGY, + ATTRIBUTE_GT, ATTRIBUTE_FREQUENCY, ATTRIBUTE_STREAMINGMP, ATTRIBUTE_CORESPERMP, ATTRIBUTE_CUDA_CORES, ATTRIBUTE_TENSOR_CORES, + ATTRIBUTE_EUS, ATTRIBUTE_L2, ATTRIBUTE_MEMORY, ATTRIBUTE_MEMORY_FREQ, @@ -52,11 +56,13 @@ static const char* ATTRIBUTE_FIELDS [] = { "GPU processor:", "Microarchitecture:", "Technology:", + "Graphics Tier:", "Max Frequency:", "SMs:", "Cores/SM:", "CUDA Cores:", "Tensor Cores:", + "Execution Units:", "L2 Size:", "Memory:", "Memory frequency:", @@ -70,11 +76,13 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = { "Processor:", "uArch:", "Technology:", + "GT:", "Max Freq.:", "SMs:", "Cores/SM:", "CUDA Cores:", "Tensor Cores:", + "EUs:", "L2 Size:", "Memory:", "Memory freq.:", @@ -200,23 +208,32 @@ void replace_bgbyfg_color(struct ascii_logo* logo) { for(int i=0; i < 2; i++) { if(logo->color_ascii[i] == NULL) break; - if(strcmp(logo->color_ascii[i], COLOR_BG_BLACK) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLACK); - else if(strcmp(logo->color_ascii[i], COLOR_BG_RED) == 0) strcpy(logo->color_ascii[i], COLOR_FG_RED); - else if(strcmp(logo->color_ascii[i], COLOR_BG_GREEN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_GREEN); - else if(strcmp(logo->color_ascii[i], COLOR_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], COLOR_FG_YELLOW); - else if(strcmp(logo->color_ascii[i], COLOR_BG_BLUE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLUE); - else if(strcmp(logo->color_ascii[i], COLOR_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], COLOR_FG_MAGENTA); - else if(strcmp(logo->color_ascii[i], COLOR_BG_CYAN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_CYAN); - else if(strcmp(logo->color_ascii[i], COLOR_BG_WHITE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_WHITE); + if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK); + else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED); + else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN); + else if(strcmp(logo->color_ascii[i], C_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], C_FG_YELLOW); + else if(strcmp(logo->color_ascii[i], C_BG_BLUE) == 0) strcpy(logo->color_ascii[i], C_FG_BLUE); + else if(strcmp(logo->color_ascii[i], C_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], C_FG_MAGENTA); + else if(strcmp(logo->color_ascii[i], C_BG_CYAN) == 0) strcpy(logo->color_ascii[i], C_FG_CYAN); + else if(strcmp(logo->color_ascii[i], C_BG_WHITE) == 0) strcpy(logo->color_ascii[i], C_FG_WHITE); + } +} + +struct ascii_logo* choose_ascii_art_aux(struct ascii_logo* logo_long, struct ascii_logo* logo_short, struct terminal* term, int lf) { + if(ascii_fits_screen(term->w, *logo_long, lf)) { + return logo_long; + } + else { + return logo_short; } } void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) { if(art->vendor == GPU_VENDOR_NVIDIA) { - if(term != NULL && ascii_fits_screen(term->w, logo_nvidia_l, lf)) - art->art = &logo_nvidia_l; - else - art->art = &logo_nvidia; + art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf); + } + else if(art->vendor == GPU_VENDOR_INTEL) { + art->art = choose_ascii_art_aux(&logo_intel_l, &logo_intel, term, lf); } else { art->art = &logo_unknown; @@ -228,10 +245,10 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter switch(art->style) { case STYLE_LEGACY: logo->replace_blocks = false; - strcpy(logo->color_text[0], COLOR_NONE); - strcpy(logo->color_text[1], COLOR_NONE); - strcpy(logo->color_ascii[0], COLOR_NONE); - strcpy(logo->color_ascii[1], COLOR_NONE); + strcpy(logo->color_text[0], C_NONE); + strcpy(logo->color_text[1], C_NONE); + strcpy(logo->color_ascii[0], C_NONE); + strcpy(logo->color_ascii[1], C_NONE); art->reset[0] = '\0'; break; case STYLE_RETRO: @@ -245,7 +262,7 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true)); strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true)); } - strcpy(art->reset, COLOR_RESET); + strcpy(art->reset, C_RESET); break; case STYLE_INVALID: default: @@ -342,6 +359,48 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con printf("\n"); } +#ifdef BACKEND_INTEL +bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) { + struct ascii* art = set_ascii(get_gpu_vendor(gpu), s); + + if(art == NULL) + return false; + + char* gpu_name = get_str_gpu_name(gpu); + char* uarch = get_str_uarch_intel(gpu->arch); + char* gt = get_str_gt(gpu->arch); + char* manufacturing_process = get_str_process(gpu->arch); + char* eus = get_str_eu(gpu); + char* max_frequency = get_str_freq(gpu); + char* pp = get_str_peak_performance(gpu); + + setAttribute(art, ATTRIBUTE_NAME, gpu_name); + setAttribute(art, ATTRIBUTE_UARCH, uarch); + setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); + setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); + setAttribute(art, ATTRIBUTE_GT, gt); + setAttribute(art, ATTRIBUTE_EUS, eus); + setAttribute(art, ATTRIBUTE_PEAK, pp); + + const char** attribute_fields = ATTRIBUTE_FIELDS; + uint32_t longest_attribute = longest_attribute_length(art, attribute_fields); + uint32_t longest_field = longest_field_length(art, longest_attribute); + choose_ascii_art(art, cs, term, longest_field); + + if(!ascii_fits_screen(term->w, *art->art, longest_field)) { + // Despite of choosing the smallest logo, the output does not fit + // Choose the shorter field names and recalculate the longest attr + attribute_fields = ATTRIBUTE_FIELDS_SHORT; + longest_attribute = longest_attribute_length(art, attribute_fields); + } + + print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields); + + return true; +} +#endif + +#ifdef BACKEND_CUDA bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) { struct ascii* art = set_ascii(get_gpu_vendor(gpu), s); @@ -350,7 +409,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc char* gpu_name = get_str_gpu_name(gpu); char* gpu_chip = get_str_chip(gpu->arch); - char* uarch = get_str_uarch(gpu->arch); + char* uarch = get_str_uarch_cuda(gpu->arch); char* comp_cap = get_str_cc(gpu->arch); char* manufacturing_process = get_str_process(gpu->arch); char* sms = get_str_sm(gpu); @@ -416,6 +475,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc return true; } +#endif struct terminal* get_terminal_size() { struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal)); @@ -448,5 +508,17 @@ struct terminal* get_terminal_size() { bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) { struct terminal* term = get_terminal_size(); - return print_gpufetch_cuda(gpu, s, cs, term); + if(gpu->vendor == GPU_VENDOR_NVIDIA) + #ifdef BACKEND_CUDA + return print_gpufetch_cuda(gpu, s, cs, term); + #else + return false; + #endif + else { + #ifdef BACKEND_INTEL + return print_gpufetch_intel(gpu, s, cs, term); + #else + return false; + #endif + } } diff --git a/src/common/uarch.cpp b/src/common/uarch.cpp new file mode 100644 index 0000000..1549fa0 --- /dev/null +++ b/src/common/uarch.cpp @@ -0,0 +1,28 @@ +#include +#include +#include + +#include "global.hpp" +#include "uarch.hpp" + +char* get_str_process(struct uarch* arch) { + char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1)); + int32_t process = arch->process; + + if(process == UNK) { + snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN); + } + else if(process > 100) { + sprintf(str, "%.2fum", (double)process/100); + } + else if(process > 0){ + sprintf(str, "%dnm", process); + } + else { + snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN); + printBug("Found invalid process: '%d'", process); + } + + return str; +} + diff --git a/src/common/uarch.hpp b/src/common/uarch.hpp new file mode 100644 index 0000000..56bfe9b --- /dev/null +++ b/src/common/uarch.hpp @@ -0,0 +1,31 @@ +#ifndef __COMMON_UARCH__ +#define __COMMON_UARCH__ + +// Data not available +#define NA -1 + +// Unknown manufacturing process +#define UNK -1 + +typedef uint32_t GPUCHIP; +typedef uint32_t MICROARCH; + +struct uarch { + // NVIDIA specific + int32_t cc_major; + int32_t cc_minor; + int32_t compute_capability; + + // Intel specific + int32_t gt; + int32_t eu; + + MICROARCH uarch; + GPUCHIP chip; + + int32_t process; + char* uarch_str; + char* chip_str; +}; + +#endif diff --git a/src/cuda/chips.hpp b/src/cuda/chips.hpp index 53e6535..e0f2f61 100644 --- a/src/cuda/chips.hpp +++ b/src/cuda/chips.hpp @@ -1,10 +1,10 @@ -#ifndef __GPUCHIPS__ -#define __GPUCHIPS__ +#ifndef __CUDA_GPUCHIPS__ +#define __CUDA_GPUCHIPS__ typedef uint32_t GPUCHIP; enum { - CHIP_UNKNOWN, + CHIP_UNKNOWN_CUDA, CHIP_G80, CHIP_G80GL, CHIP_G84, diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index 21949b4..afe5a0d 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -6,40 +6,12 @@ #include "../common/pci.hpp" #include "../common/global.hpp" -int print_gpus_list() { - cudaError_t err = cudaSuccess; - int num_gpus = -1; +bool print_gpu_cuda(struct gpu_info* gpu) { + char* cc = get_str_cc(gpu->arch); + printf("%s (Compute Capability %s)\n", gpu->name, cc); + free(cc); - if ((err = cudaGetDeviceCount(&num_gpus)) != cudaSuccess) { - printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); - return EXIT_FAILURE; - } - printf("CUDA GPUs available: %d\n", num_gpus); - - if(num_gpus > 0) { - cudaDeviceProp deviceProp; - int max_len = 0; - - for(int idx=0; idx < num_gpus; idx++) { - if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) { - printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); - return EXIT_FAILURE; - } - max_len = max(max_len, (int) strlen(deviceProp.name)); - } - - for(int i=0; i < max_len + 32; i++) putchar('-'); - putchar('\n'); - for(int idx=0; idx < num_gpus; idx++) { - if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) { - printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); - return EXIT_FAILURE; - } - printf("GPU %d: %s (Compute Capability %d.%d)\n", idx, deviceProp.name, deviceProp.major, deviceProp.minor); - } - } - - return EXIT_SUCCESS; + return true; } struct cache* get_cache_info(cudaDeviceProp prop) { @@ -104,12 +76,12 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) { } // Compute peak performance when using CUDA cores -int64_t get_peak_performance(struct gpu_info* gpu) { +int64_t get_peak_performance_cuda(struct gpu_info* gpu) { return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2; } // Compute peak performance when using tensor cores -int64_t get_peak_performance_t(cudaDeviceProp prop, struct gpu_info* gpu) { +int64_t get_peak_performance_tcu(cudaDeviceProp prop, struct gpu_info* gpu) { // Volta / Turing tensor cores performs 4x4x4 FP16 matrix multiplication // Ampere tensor cores performs 8x4x8 FP16 matrix multiplicacion if(prop.major == 7) return gpu->freq * 1000000 * 4 * 4 * 4 * 2 * gpu->topo->tensor_cores; @@ -117,7 +89,7 @@ int64_t get_peak_performance_t(cudaDeviceProp prop, struct gpu_info* gpu) { else return 0; } -struct gpu_info* get_gpu_info(int gpu_idx) { +struct gpu_info* get_gpu_info_cuda(int gpu_idx) { struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); gpu->pci = NULL; gpu->idx = gpu_idx; @@ -127,8 +99,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) { return NULL; } - printf("Waiting for CUDA driver to start..."); - fflush(stdout); + if(gpu_idx == 0) { + printf("Waiting for CUDA driver to start..."); + fflush(stdout); + } int num_gpus = -1; cudaError_t err = cudaSuccess; @@ -136,7 +110,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) { printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); return NULL; } - printf("\r "); + + if(gpu_idx == 0) { + printf("\r"); + } if(num_gpus <= 0) { printErr("No CUDA capable devices found!"); @@ -144,7 +121,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) { } if(gpu->idx+1 > num_gpus) { - printErr("Requested GPU index %d in a system with %d GPUs", gpu->idx, num_gpus); + // Master is trying to query an invalid GPU return NULL; } @@ -160,25 +137,22 @@ struct gpu_info* get_gpu_info(int gpu_idx) { strcpy(gpu->name, deviceProp.name); struct pci_dev *devices = get_pci_devices_from_pciutils(); - gpu->pci = get_pci_from_pciutils(devices); + gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA); gpu->arch = get_uarch_from_cuda(gpu); gpu->cach = get_cache_info(deviceProp); gpu->mem = get_memory_info(gpu, deviceProp); gpu->topo = get_topology_info(deviceProp); +<<<<<<< HEAD gpu->peak_performance = get_peak_performance(gpu); gpu->peak_performance_t = get_peak_performance_t(deviceProp, gpu); +======= + gpu->peak_performance = get_peak_performance_cuda(gpu); + gpu->peak_performance_tcu = get_peak_performance_tcu(gpu); +>>>>>>> origin/intel return gpu; } -char* get_str_generic(int32_t data) { - // Largest int is 10, +1 for possible negative, +1 for EOL - uint32_t max_size = 12; - char* dummy = (char *) ecalloc(max_size, sizeof(char)); - snprintf(dummy, max_size, "%d", data); - return dummy; -} - char* get_str_sm(struct gpu_info* gpu) { return get_str_generic(gpu->topo->streaming_mp); } diff --git a/src/cuda/cuda.hpp b/src/cuda/cuda.hpp index a132675..931bf29 100644 --- a/src/cuda/cuda.hpp +++ b/src/cuda/cuda.hpp @@ -1,10 +1,10 @@ -#ifndef __CUDA__ -#define __CUDA__ +#ifndef __CUDA_GPU__ +#define __CUDA_GPU__ #include "../common/gpu.hpp" -struct gpu_info* get_gpu_info(int gpu_idx); -int print_gpus_list(); +struct gpu_info* get_gpu_info_cuda(int gpu_idx); +bool print_gpu_cuda(struct gpu_info* gpu); char* get_str_sm(struct gpu_info* gpu); char* get_str_cores_sm(struct gpu_info* gpu); char* get_str_cuda_cores(struct gpu_info* gpu); diff --git a/src/cuda/pci.cpp b/src/cuda/pci.cpp index 051f324..868d0ac 100644 --- a/src/cuda/pci.cpp +++ b/src/cuda/pci.cpp @@ -8,21 +8,7 @@ #define CHECK_PCI_START if (false) {} #define CHECK_PCI(pci, id, chip) \ else if (pci->device_id == id) return chip; -#define CHECK_PCI_END else { printBug("TODOO"); return CHIP_UNKNOWN; } - -struct pci { - uint16_t vendor_id; - uint16_t device_id; -}; - -struct pci* get_pci_from_pciutils(struct pci_dev *devices) { - struct pci* pci = (struct pci*) emalloc(sizeof(struct pci)); - - pci->vendor_id = pciutils_get_pci_vendor_id(devices); - pci->device_id = pciutils_get_pci_device_id(devices); - - return pci; -} +#define CHECK_PCI_END else { printBug("Unkown CUDA device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_CUDA; } /* * pci ids were retrieved using https://github.com/pciutils/pciids @@ -33,7 +19,7 @@ struct pci* get_pci_from_pciutils(struct pci_dev *devices) { * or in pci.ids itself) */ -GPUCHIP get_chip_from_pci(struct pci* pci) { +GPUCHIP get_chip_from_pci_cuda(struct pci* pci) { CHECK_PCI_START CHECK_PCI(pci, 0x25e5, CHIP_GA107BM) CHECK_PCI(pci, 0x25e2, CHIP_GA107BM) diff --git a/src/cuda/pci.hpp b/src/cuda/pci.hpp index 203d2db..4627c8f 100644 --- a/src/cuda/pci.hpp +++ b/src/cuda/pci.hpp @@ -6,9 +6,14 @@ #include "../common/pci.hpp" #include "chips.hpp" +/* + * doc: https://wiki.osdev.org/PCI#Class_Codes + * https://pci-ids.ucw.cz/read/PC + */ +#define PCI_VENDOR_ID_NVIDIA 0x10de + struct pci; -struct pci* get_pci_from_pciutils(struct pci_dev *devices); -GPUCHIP get_chip_from_pci(struct pci* pci); +GPUCHIP get_chip_from_pci_cuda(struct pci* pci); #endif diff --git a/src/cuda/uarch.cpp b/src/cuda/uarch.cpp index 6a9f144..f769b18 100644 --- a/src/cuda/uarch.cpp +++ b/src/cuda/uarch.cpp @@ -3,21 +3,14 @@ #include #include +#include "../common/uarch.hpp" #include "../common/global.hpp" #include "../common/gpu.hpp" #include "chips.hpp" -typedef uint32_t MICROARCH; - // Any clock multiplier #define CM_ANY -1 -// Data not available -#define NA -1 - -// Unknown manufacturing process -#define UNK -1 - // MICROARCH values enum { UARCH_UNKNOWN, @@ -43,23 +36,10 @@ static const char *uarch_str[] = { /*[ARCH_AMPERE] = */ "Ampere", }; -struct uarch { - int32_t cc_major; - int32_t cc_minor; - int32_t compute_capability; - - MICROARCH uarch; - GPUCHIP chip; - - int32_t process; - char* uarch_str; - char* chip_str; -}; - #define CHECK_UARCH_START if (false) {} #define CHECK_UARCH(arch, chip_, str, uarch, process) \ else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process); -#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); } +#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); } void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) { arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1)); @@ -74,7 +54,7 @@ void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t proce * o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip * o CHIP_XXXM: indicates a mobile chip */ -void map_chip_to_uarch(struct uarch* arch) { +void map_chip_to_uarch_cuda(struct uarch* arch) { CHECK_UARCH_START // TESLA (1.0, 1.1, 1.2, 1.3) // CHECK_UARCH(arch, CHIP_G80, "G80", UARCH_TESLA, 90) @@ -263,9 +243,8 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) { arch->cc_major = deviceProp.major; arch->cc_minor = deviceProp.minor; arch->compute_capability = deviceProp.major * 10 + deviceProp.minor; - arch->chip = get_chip_from_pci(gpu->pci); - - map_chip_to_uarch(arch); + arch->chip = get_chip_from_pci_cuda(gpu->pci); + map_chip_to_uarch_cuda(arch); return arch; } @@ -335,10 +314,6 @@ MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) { CHECK_MEMTYPE_END } -const char* get_str_uarch(struct uarch* arch) { - return uarch_str[arch->uarch]; -} - char* get_str_cc(struct uarch* arch) { uint32_t max_size = 4; char* cc = (char *) ecalloc(max_size, sizeof(char)); @@ -346,31 +321,14 @@ char* get_str_cc(struct uarch* arch) { return cc; } -char* get_str_process(struct uarch* arch) { - char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1)); - int32_t process = arch->process; - - if(process == UNK) { - snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN); - } - else if(process > 100) { - sprintf(str, "%.2fum", (double)process/100); - } - else if(process > 0){ - sprintf(str, "%dnm", process); - } - else { - snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN); - printBug("Found invalid process: '%d'", process); - } - - return str; -} - char* get_str_chip(struct uarch* arch) { return arch->chip_str; } +const char* get_str_uarch_cuda(struct uarch* arch) { + return uarch_str[arch->uarch]; +} + void free_uarch_struct(struct uarch* arch) { free(arch->uarch_str); free(arch->chip_str); diff --git a/src/cuda/uarch.hpp b/src/cuda/uarch.hpp index 86355fe..375b001 100644 --- a/src/cuda/uarch.hpp +++ b/src/cuda/uarch.hpp @@ -1,5 +1,5 @@ -#ifndef __UARCH__ -#define __UARCH__ +#ifndef __CUDA_UARCH__ +#define __CUDA_UARCH__ #include "../common/gpu.hpp" @@ -8,7 +8,7 @@ struct uarch; struct uarch* get_uarch_from_cuda(struct gpu_info* gpu); bool clkm_possible_for_uarch(int clkm, struct uarch* arch); MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch); -char* get_str_uarch(struct uarch* arch); +char* get_str_uarch_cuda(struct uarch* arch); char* get_str_cc(struct uarch* arch); char* get_str_chip(struct uarch* arch); char* get_str_process(struct uarch* arch); diff --git a/src/intel/check.sh b/src/intel/check.sh new file mode 100755 index 0000000..ebc2574 --- /dev/null +++ b/src/intel/check.sh @@ -0,0 +1,12 @@ +#!/bin/bash -u +# Checks the difference between supported uarchs +# and uarchs that have their topology available +# in file uarch.cpp + +uarchs="$(grep 'CHECK_UARCH' uarch.cpp | cut -d',' -f4-5 | grep 'UARCH_GEN' | tr -d ' ' | sort | uniq)" +topos="$(grep 'CHECK_TOPO' uarch.cpp | cut -d',' -f3,4 | grep 'UARCH_' | tr -d ' ' | sort | uniq)" + +echo "$uarchs" > /tmp/uarchs.txt +echo "$topos" > /tmp/topos.txt +meld /tmp/uarchs.txt /tmp/topos.txt +rm -f /tmp/uarchs.txt /tmp/topos.txt diff --git a/src/intel/chips.hpp b/src/intel/chips.hpp new file mode 100644 index 0000000..c4efb35 --- /dev/null +++ b/src/intel/chips.hpp @@ -0,0 +1,59 @@ +#ifndef __INTEL_GPUCHIPS__ +#define __INTEL_GPUCHIPS__ + +#include + +typedef uint32_t GPUCHIP; + +enum { + CHIP_UNKNOWN_INTEL, + // Gen6 + CHIP_HD_2000, + CHIP_HD_3000, + // Gen7 + CHIP_HD_2500, + CHIP_HD_4000, + CHIP_HD_P4000, + // Gen7.5 + CHIP_HD_4200, + CHIP_HD_4400, + CHIP_HD_4600, + CHIP_HD_P4600, + CHIP_IRIS_5100, + CHIP_IRISP_5200, + CHIP_IRISP_P5200, + // Gen8 + CHIP_HD_5300, + CHIP_HD_5500, + CHIP_HD_5600, + CHIP_HD_P5700, + CHIP_HD_6000, + CHIP_IRIS_6100, + CHIP_IRISP_6200, + CHIP_IRISP_P6300, + // Gen9 + CHIP_HD_510, + CHIP_HD_515, + CHIP_HD_520, + CHIP_HD_530, + CHIP_HD_P530, + CHIP_HD_540, + CHIP_HD_550, + CHIP_IRIS_P555, + CHIP_IRIS_580, + CHIP_IRIS_P580, + // Gen9.5 + CHIP_UHD_600, + CHIP_UHD_605, + CHIP_UHD_620, + CHIP_UHD_630, + CHIP_HD_610, + CHIP_HD_615, + CHIP_HD_620, + CHIP_HD_630, + CHIP_HD_P630, + CHIP_IRISP_640, + CHIP_IRISP_650, +}; + +#endif diff --git a/src/intel/intel.cpp b/src/intel/intel.cpp new file mode 100644 index 0000000..da32506 --- /dev/null +++ b/src/intel/intel.cpp @@ -0,0 +1,46 @@ +#include +#include + +#include "intel.hpp" +#include "uarch.hpp" +#include "chips.hpp" +#include "udev.hpp" +#include "../common/pci.hpp" +#include "../common/global.hpp" + +int64_t get_peak_performance_intel(struct gpu_info* gpu) { + return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2; +} + +struct gpu_info* get_gpu_info_intel() { + struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info)); + gpu->vendor = GPU_VENDOR_INTEL; + + struct pci_dev *devices = get_pci_devices_from_pciutils(); + gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL); + + if(gpu->pci == NULL) { + // No Intel iGPU found in PCI, which means it is not present + return NULL; + } + + gpu->arch = get_uarch_from_pci(gpu->pci); + gpu->name = get_name_from_uarch(gpu->arch); + gpu->topo_i = get_topology_info(gpu->arch); + gpu->freq = get_max_freq_from_file(gpu->pci); + gpu->peak_performance = get_peak_performance_intel(gpu); + + return gpu; +} + +bool print_gpu_intel(struct gpu_info* gpu) { + if(gpu->vendor != GPU_VENDOR_INTEL) return false; + + printf("Intel %s\n", gpu->name); + + return true; +} + +char* get_str_eu(struct gpu_info* gpu) { + return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice); +} diff --git a/src/intel/intel.hpp b/src/intel/intel.hpp new file mode 100644 index 0000000..dc2ea72 --- /dev/null +++ b/src/intel/intel.hpp @@ -0,0 +1,10 @@ +#ifndef __INTEL_GPU__ +#define __INTEL_GPU__ + +#include "../common/gpu.hpp" + +struct gpu_info* get_gpu_info_intel(); +bool print_gpu_intel(struct gpu_info* gpu); +char* get_str_eu(struct gpu_info* gpu); + +#endif diff --git a/src/intel/pci.cpp b/src/intel/pci.cpp new file mode 100644 index 0000000..ea1af9d --- /dev/null +++ b/src/intel/pci.cpp @@ -0,0 +1,88 @@ +#include + +#include "pci.hpp" +#include "chips.hpp" +#include "../common/global.hpp" +#include "../common/pci.hpp" + +#define CHECK_PCI_START if (false) {} +#define CHECK_PCI(pci, id, chip) \ + else if (pci->device_id == id) return chip; +#define CHECK_PCI_END else { printBug("Unkown Intel device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_INTEL; } + +/* + * https://github.com/mesa3d/mesa/blob/main/include/pci_ids/i965_pci_ids.h + */ +GPUCHIP get_chip_from_pci_intel(struct pci* pci) { + CHECK_PCI_START + // Gen6 + CHECK_PCI(pci, 0x0102, CHIP_HD_2000) + CHECK_PCI(pci, 0x0106, CHIP_HD_2000) + CHECK_PCI(pci, 0x010A, CHIP_HD_2000) + CHECK_PCI(pci, 0x0112, CHIP_HD_3000) + CHECK_PCI(pci, 0x0122, CHIP_HD_3000) + CHECK_PCI(pci, 0x0116, CHIP_HD_3000) + CHECK_PCI(pci, 0x0126, CHIP_HD_3000) + // Gen7 + CHECK_PCI(pci, 0x0152, CHIP_HD_2500) + CHECK_PCI(pci, 0x0156, CHIP_HD_2500) + CHECK_PCI(pci, 0x0162, CHIP_HD_4000) + CHECK_PCI(pci, 0x0166, CHIP_HD_4000) + CHECK_PCI(pci, 0x016a, CHIP_HD_P4000) + // Gen7.5 + CHECK_PCI(pci, 0x0A1E, CHIP_HD_4200) + CHECK_PCI(pci, 0x041E, CHIP_HD_4400) + CHECK_PCI(pci, 0x0A16, CHIP_HD_4400) + CHECK_PCI(pci, 0x0412, CHIP_HD_4600) + CHECK_PCI(pci, 0x0416, CHIP_HD_4600) + CHECK_PCI(pci, 0x0D12, CHIP_HD_4600) + CHECK_PCI(pci, 0x041A, CHIP_HD_P4600) + CHECK_PCI(pci, 0x0A2E, CHIP_IRIS_5100) + CHECK_PCI(pci, 0x0D22, CHIP_IRISP_5200) + CHECK_PCI(pci, 0x0D26, CHIP_IRISP_P5200) + // Gen8 + CHECK_PCI(pci, 0x161E, CHIP_HD_5300) + CHECK_PCI(pci, 0x1616, CHIP_HD_5500) + CHECK_PCI(pci, 0x1612, CHIP_HD_5600) + CHECK_PCI(pci, 0x161A, CHIP_HD_P5700) + CHECK_PCI(pci, 0x1626, CHIP_HD_6000) + CHECK_PCI(pci, 0x162B, CHIP_IRIS_6100) + CHECK_PCI(pci, 0x1622, CHIP_IRISP_6200) + CHECK_PCI(pci, 0x162A, CHIP_IRISP_P6300) + // Gen9 + CHECK_PCI(pci, 0x1902, CHIP_HD_510) + CHECK_PCI(pci, 0x1906, CHIP_HD_510) + CHECK_PCI(pci, 0x190B, CHIP_HD_510) + CHECK_PCI(pci, 0x191E, CHIP_HD_515) + CHECK_PCI(pci, 0x1916, CHIP_HD_520) + CHECK_PCI(pci, 0x1921, CHIP_HD_520) + CHECK_PCI(pci, 0x1912, CHIP_HD_530) + CHECK_PCI(pci, 0x191B, CHIP_HD_530) + CHECK_PCI(pci, 0x191D, CHIP_HD_P530) + /*CHECK_PCI(pci, 0x5917, CHIP_HD_540) + CHECK_PCI(pci, 0x5917, CHIP_HD_550) + CHECK_PCI(pci, 0x5917, CHIP_HD_P555) + CHECK_PCI(pci, 0x5917, CHIP_HD_580) + CHECK_PCI(pci, 0x5917, CHIP_HD_P580)*/ + // Gen9.5 + CHECK_PCI(pci, 0x3185, CHIP_UHD_600) + CHECK_PCI(pci, 0x3184, CHIP_UHD_605) + CHECK_PCI(pci, 0x5917, CHIP_UHD_620) + CHECK_PCI(pci, 0x3E91, CHIP_UHD_630) + CHECK_PCI(pci, 0x3E92, CHIP_UHD_630) + CHECK_PCI(pci, 0x3E98, CHIP_UHD_630) + CHECK_PCI(pci, 0x3E9B, CHIP_UHD_630) + CHECK_PCI(pci, 0x9BC5, CHIP_UHD_630) + CHECK_PCI(pci, 0x9BC8, CHIP_UHD_630) + CHECK_PCI(pci, 0x5902, CHIP_HD_610) + CHECK_PCI(pci, 0x5906, CHIP_HD_610) + CHECK_PCI(pci, 0x590B, CHIP_HD_610) + CHECK_PCI(pci, 0x591E, CHIP_HD_615) + CHECK_PCI(pci, 0x5912, CHIP_HD_630) + CHECK_PCI(pci, 0x591B, CHIP_HD_630) + CHECK_PCI(pci, 0x591A, CHIP_HD_P630) + CHECK_PCI(pci, 0x591D, CHIP_HD_P630) + CHECK_PCI(pci, 0x5926, CHIP_IRISP_640) + CHECK_PCI(pci, 0x5927, CHIP_IRISP_650) + CHECK_PCI_END +} diff --git a/src/intel/pci.hpp b/src/intel/pci.hpp new file mode 100644 index 0000000..9966a04 --- /dev/null +++ b/src/intel/pci.hpp @@ -0,0 +1,19 @@ +#ifndef __PCI_INTEL__ +#define __PCI_INTEL__ + +#include + +#include "../common/pci.hpp" +#include "chips.hpp" + +/* + * doc: https://wiki.osdev.org/PCI#Class_Codes + * https://pci-ids.ucw.cz/read/PC + */ +#define PCI_VENDOR_ID_INTEL 0x8086 + +struct pci; + +GPUCHIP get_chip_from_pci_intel(struct pci* pci); + +#endif diff --git a/src/intel/uarch.cpp b/src/intel/uarch.cpp new file mode 100644 index 0000000..21643e2 --- /dev/null +++ b/src/intel/uarch.cpp @@ -0,0 +1,212 @@ +#include +#include +#include +#include + +#include "../common/uarch.hpp" +#include "../common/global.hpp" +#include "../common/gpu.hpp" +#include "chips.hpp" +#include "pci.hpp" + +// Data not available +#define NA -1 + +// Unknown manufacturing process +#define UNK -1 + +/* + * Mapping between iGPU and CPU uarchs + * ----------------------------------- + * Gen6: Sandy Bridge (2th Gen) + * Gen7: Ivy Brdige (3th Gen) + * Gen7.5: Haswell (4th Gen) + * Gen8: Broadwell (5th Gen) + * Gen9: Skylake (6th Gen) + * Gen9.5: Kaby Lake + */ +enum { + UARCH_UNKNOWN, + UARCH_GEN6, + UARCH_GEN7, + UARCH_GEN7_5, + UARCH_GEN8, + UARCH_GEN9, + UARCH_GEN9_5, +}; + +static const char *uarch_str[] = { + /*[ARCH_UNKNOWN = */ STRING_UNKNOWN, + /*[ARCH_GEN6] = */ "Gen6", + /*[ARCH_GEN7] = */ "Gen7", + /*[ARCH_GEN7_5] = */ "Gen7.5", + /*[ARCH_GEN8] = */ "Gen8", + /*[ARCH_GEN9] = */ "Gen9", + /*[ARCH_GEN9_5] = */ "Gen9.5", +}; + +// Graphic Tiers (GT) +enum { + GT_UNKNOWN, + GT1, + GT1_5, + GT2, + GT3, + GT3e, + GT4e +}; + +static const char *gt_str[] = { + /*[GT_UNKNOWN] = */ STRING_UNKNOWN, + /*[GT1] = */ "GT1", + /*[GT1_5] = */ "GT1.5", + /*[GT2] = */ "GT2", + /*[GT3] = */ "GT3", + /*[GT3e] = */ "GT3e", + /*[GT4e] = */ "GT4e", +}; + +#define CHECK_UARCH_START if (false) {} +#define CHECK_UARCH(arch, chip_, str, uarch, gt, process) \ + else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process); +#define CHECK_UARCH_END else { printBug("map_chip_to_uarch_intel: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); } + +#define CHECK_TOPO_START if (false) {} +#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \ + else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli); +#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); } + +void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) { + topo_i->slices = sli; + topo_i->subslices = sub; + topo_i->eu_subslice = eu_sub; +} + +void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) { + arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1)); + strcpy(arch->chip_str, str); + arch->uarch = u; + arch->process = process; + arch->gt = gt; +} + +void map_chip_to_uarch_intel(struct uarch* arch) { + CHECK_UARCH_START + // Gen6 + CHECK_UARCH(arch, CHIP_HD_2000, "HD Graphics 2000", UARCH_GEN6, GT1, 32) + CHECK_UARCH(arch, CHIP_HD_3000, "HD Graphics 3000", UARCH_GEN6, GT2, 32) + // Gen7 + CHECK_UARCH(arch, CHIP_HD_2500, "HD Graphics 2500", UARCH_GEN7, GT1, 22) + CHECK_UARCH(arch, CHIP_HD_4000, "HD Graphics 4000", UARCH_GEN7, GT2, 22) + CHECK_UARCH(arch, CHIP_HD_P4000, "HD Graphics P4000", UARCH_GEN7, GT2, 22) + // Gen7.5 + CHECK_UARCH(arch, CHIP_HD_4200, "HD Graphics 4200", UARCH_GEN7_5, GT2, 22) + CHECK_UARCH(arch, CHIP_HD_4400, "HD Graphics 4400", UARCH_GEN7_5, GT2, 22) + CHECK_UARCH(arch, CHIP_HD_4600, "HD Graphics 4600", UARCH_GEN7_5, GT2, 22) + CHECK_UARCH(arch, CHIP_HD_P4600, "HD Graphics P4600", UARCH_GEN7_5, GT2, 22) + CHECK_UARCH(arch, CHIP_IRIS_5100, "HD Iris 5100", UARCH_GEN7_5, GT3, 22) + CHECK_UARCH(arch, CHIP_IRISP_5200, "HD Iris Pro 5200", UARCH_GEN7_5, GT3, 22) + CHECK_UARCH(arch, CHIP_IRISP_P5200, "HD Iris Pro P5200", UARCH_GEN7_5, GT3, 22) + // Gen8 + CHECK_UARCH(arch, CHIP_HD_5300, "HD Graphics 5300", UARCH_GEN8, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_5500, "HD Graphics 5500", UARCH_GEN8, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_5600, "HD Graphics 5600", UARCH_GEN8, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_P5700, "HD Graphics P5700", UARCH_GEN8, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_6000, "HD Graphics 6000", UARCH_GEN8, GT3, 14) + CHECK_UARCH(arch, CHIP_IRIS_6100, "Iris Graphics 6100", UARCH_GEN8, GT3, 14) + CHECK_UARCH(arch, CHIP_IRISP_6200, "Iris Pro Graphics 6200", UARCH_GEN8, GT3, 14) + CHECK_UARCH(arch, CHIP_IRISP_P6300, "Iris Pro Graphics P6300", UARCH_GEN8, GT3, 14) + // Gen9 + CHECK_UARCH(arch, CHIP_HD_510, "HD Graphics 510", UARCH_GEN9, GT1, 14) + CHECK_UARCH(arch, CHIP_HD_515, "HD Graphics 515", UARCH_GEN9, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_520, "HD Graphics 520", UARCH_GEN9, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_530, "HD Graphics 530", UARCH_GEN9, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_P530, "HD Graphics P530", UARCH_GEN9, GT2, 14) + // Gen9.5 + CHECK_UARCH(arch, CHIP_UHD_600, "UHD Graphics 600", UARCH_GEN9_5, GT1, 14) + CHECK_UARCH(arch, CHIP_UHD_605, "UHD Graphics 605", UARCH_GEN9_5, GT1_5, 14) + CHECK_UARCH(arch, CHIP_UHD_620, "UHD Graphics 620", UARCH_GEN9_5, GT2, 14) + CHECK_UARCH(arch, CHIP_UHD_630, "UHD Graphics 630", UARCH_GEN9_5, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_610, "HD Graphics 610", UARCH_GEN9_5, GT1, 14) + CHECK_UARCH(arch, CHIP_HD_615, "HD Graphics 615", UARCH_GEN9_5, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_630, "HD Graphics 630", UARCH_GEN9_5, GT2, 14) + CHECK_UARCH(arch, CHIP_HD_P630, "HD Graphics P630", UARCH_GEN9_5, GT2, 14) + CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 640", UARCH_GEN9_5, GT3e, 14) + CHECK_UARCH(arch, CHIP_IRISP_640, "Iris Plus Graphics 650", UARCH_GEN9_5, GT3e, 14) + CHECK_UARCH_END +} + +const char* get_str_uarch_intel(struct uarch* arch) { + return uarch_str[arch->uarch]; +} + +const char* get_str_gt(struct uarch* arch) { + return gt_str[arch->gt]; +} + +struct uarch* get_uarch_from_pci(struct pci* pci) { + struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch)); + + arch->chip_str = NULL; + arch->chip = get_chip_from_pci_intel(pci); + if(arch->chip == CHIP_UNKNOWN_INTEL) { + return NULL; + } + else { + map_chip_to_uarch_intel(arch); + return arch; + } +} + +char* get_name_from_uarch(struct uarch* arch) { + char* name = (char *) emalloc(sizeof(char) * (strlen(arch->chip_str) + 6 + 1)); + sprintf(name, "Intel %s", arch->chip_str); + return name; +} + +/* + * Refs: + * Gen6: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen6 + * Gen7/7.5: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen7 + "The Compute Architecture of Intel Processor Graphics Gen7.5, v1.0" + * Gen8: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen8 + "The Compute Architecture of Intel Processor Graphics Gen8, v1.1" + * Gen9: https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration + "The Compute Architecture of Intel Processor Graphics Gen9, v1.0" + * Gen9.5: https://en.wikichip.org/wiki/intel/microarchitectures/gen9.5#Configuration + */ +struct topology_i* get_topology_info(struct uarch* arch) { + struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i)); + + // Syntax: (EU per subslice, Subslices, Slices) + CHECK_TOPO_START + // Gen6 + CHECK_TOPO(topo, arch, UARCH_GEN6, GT1, 6, 1, 1) + CHECK_TOPO(topo, arch, UARCH_GEN6, GT2, 6, 2, 1) + // Gen7 + CHECK_TOPO(topo, arch, UARCH_GEN7, GT1, 6, 1, 1) + CHECK_TOPO(topo, arch, UARCH_GEN7, GT2, 8, 2, 1) + CHECK_TOPO(topo, arch, UARCH_GEN7, GT3, 6, 1, 1) + // Gen7.5 + CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT1, 10, 1, 1) + CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT2, 10, 2, 1) + CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT3, 10, 4, 1) + // Gen8 + CHECK_TOPO(topo, arch, UARCH_GEN8, GT1, 6, 2, 1) + CHECK_TOPO(topo, arch, UARCH_GEN8, GT2, 8, 3, 1) + CHECK_TOPO(topo, arch, UARCH_GEN8, GT3, 8, 6, 2) + // Gen9 + CHECK_TOPO(topo, arch, UARCH_GEN9, GT1, 6, 2, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9, GT2, 8, 3, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9, GT3, 8, 6, 2) + CHECK_TOPO(topo, arch, UARCH_GEN9, GT4e, 8, 9, 3) + // Gen9.5 + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1, 6, 2, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1_5, 6, 3, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2, 8, 3, 1) + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3, 8, 6, 2) + CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3e, 8, 6, 2) // Same as GT3, but has eDRAM cache + CHECK_TOPO_END + + return topo; +} diff --git a/src/intel/uarch.hpp b/src/intel/uarch.hpp new file mode 100644 index 0000000..2947988 --- /dev/null +++ b/src/intel/uarch.hpp @@ -0,0 +1,14 @@ +#ifndef __INTEL_UARCH__ +#define __INTEL_UARCH__ + +#include "../common/gpu.hpp" + +struct uarch; + +struct uarch* get_uarch_from_pci(struct pci* pci); +char* get_name_from_uarch(struct uarch* arch); +char* get_str_gt(struct uarch* arch); +char* get_str_uarch_intel(struct uarch* arch); +struct topology_i* get_topology_info(struct uarch* arch); + +#endif diff --git a/src/intel/udev.cpp b/src/intel/udev.cpp new file mode 100644 index 0000000..91e4142 --- /dev/null +++ b/src/intel/udev.cpp @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../common/global.hpp" +#include "../common/pci.hpp" + +#define _PATH_SYS_SYSTEM "/sys/devices/pci0000:00" +#define _PATH_SYS_DRM "/drm" +#define _PATH_CARD "/card0" +#define _PATH_FREQUENCY_MAX "/gt_max_freq_mhz" +#define _PATH_FREQUENCY_MIN "/gt_min_freq_mhz" + +#define _PATH_FREQUENCY_MAX_LEN 100 +#define DEFAULT_FILE_SIZE 4096 +#define UNKNOWN_DATA -1 + +char* read_file(char* path, int* len) { + int fd = open(path, O_RDONLY); + + if(fd == -1) { + return NULL; + } + + //File exists, read it + int bytes_read = 0; + int offset = 0; + int block = 128; + char* buf = (char *) emalloc(sizeof(char)*DEFAULT_FILE_SIZE); + memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE); + + while ( (bytes_read = read(fd, buf+offset, block)) > 0 ) { + offset += bytes_read; + } + + if (close(fd) == -1) { + return NULL; + } + + *len = offset; + return buf; +} + +long get_freq_from_file(char* path) { + int filelen; + char* buf; + if((buf = read_file(path, &filelen)) == NULL) { + printWarn("Could not open '%s'", path); + return UNKNOWN_DATA; + } + + char* end; + errno = 0; + long ret = strtol(buf, &end, 10); + if(errno != 0) { + printBug("strtol: %s", strerror(errno)); + free(buf); + return UNKNOWN_DATA; + } + + // We will be getting the frequency in MHz + // We consider it is an error if frequency is + // greater than 10 GHz or less than 100 MHz + if(ret > 10000 || ret < 100) { + printBug("Invalid data was read from file '%s': %ld\n", path, ret); + return UNKNOWN_DATA; + } + + free(buf); + + return ret; +} + +long get_max_freq_from_file(struct pci* pci) { + char path[_PATH_FREQUENCY_MAX_LEN]; + sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MAX); + return get_freq_from_file(path); +} + +long get_min_freq_from_file(struct pci* pci) { + char path[_PATH_FREQUENCY_MAX_LEN]; + sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MIN); + return get_freq_from_file(path); +} diff --git a/src/intel/udev.hpp b/src/intel/udev.hpp new file mode 100644 index 0000000..eb5aaae --- /dev/null +++ b/src/intel/udev.hpp @@ -0,0 +1,7 @@ +#ifndef __UDEV__ +#define __UDEV__ + +long get_max_freq_from_file(struct pci* pci); +long get_min_freq_from_file(struct pci* pci); + +#endif