[v0.11] Handle the case where the GPU is not found in the pci LUT

[v0.11] Do not show error message when there is no Intel iGPU
[v0.11] Fix compilation error and ambiguity with CUDA and Intel backend when enabled at the same time due to functions with the same name
2021-12-18 20:12:41 +01:00 · 2021-12-18 10:35:51 +01:00 · 2021-12-18 10:14:14 +01:00 · 2021-12-10 16:18:39 +01:00 · 2021-12-10 15:55:59 +01:00 · 2021-12-10 15:32:29 +01:00
40 changed files with 1371 additions and 344 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,119 @@
+cmake_minimum_required(VERSION 3.10)
+include(CheckLanguage)
+include(ExternalProject)
+
+project(gpufetch CXX)
+
+set(SRC_DIR "src")
+set(COMMON_DIR "${SRC_DIR}/common")
+set(CUDA_DIR "${SRC_DIR}/cuda")
+set(INTEL_DIR "${SRC_DIR}/intel")
+
+if(NOT DEFINED ENABLE_INTEL_BACKEND)
+    set(ENABLE_INTEL_BACKEND true)
+endif()
+
+if(NOT DEFINED ENABLE_CUDA_BACKEND OR ENABLE_CUDA_BACKEND)
+  check_language(CUDA)
+  if(CMAKE_CUDA_COMPILER)
+    enable_language(CUDA)
+    set(ENABLE_CUDA_BACKEND true)
+    # Must link_directories early so add_executable(gpufetch ...) gets the right directories
+    link_directories(cuda_backend ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
+  else()
+    set(ENABLE_CUDA_BACKEND false)
+  endif()
+endif()
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
+find_package(PCIUTILS)
+if(NOT ${PCIUTILS_FOUND})
+  message(STATUS "${BoldYellow}pciutils not found, downloading and building a local copy...${ColorReset}")
+
+  # Download and build pciutils
+  set(PCIUTILS_INSTALL_LOCATION ${CMAKE_BINARY_DIR}/pciutils-install)
+  ExternalProject_Add(pciutils
+    GIT_REPOSITORY https://github.com/pciutils/pciutils
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND make SHARED=no
+    BUILD_IN_SOURCE true
+    INSTALL_COMMAND make PREFIX=${PCIUTILS_INSTALL_LOCATION} install-lib
+  )
+
+  include_directories(${PCIUTILS_INSTALL_LOCATION}/include)
+  link_directories(${PCIUTILS_INSTALL_LOCATION}/lib)
+else()
+  include_directories(${PCIUTILS_INCLUDE_DIR})
+  link_libraries(${PCIUTILS_LIBRARIES})
+endif()
+
+add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
+set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
+set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11")
+
+if(ENABLE_INTEL_BACKEND)
+  target_compile_definitions(gpufetch PUBLIC BACKEND_INTEL)
+
+  add_library(intel_backend STATIC ${INTEL_DIR}/intel.cpp ${INTEL_DIR}/pci.cpp ${INTEL_DIR}/uarch.cpp ${INTEL_DIR}/udev.cpp)
+
+  if(NOT ${PCIUTILS_FOUND})
+    add_dependencies(intel_backend pciutils)
+  endif()
+
+  target_link_libraries(gpufetch intel_backend)
+endif()
+
+if(ENABLE_CUDA_BACKEND)
+  target_compile_definitions(gpufetch PUBLIC BACKEND_CUDA)
+
+  # https://en.wikipedia.org/w/index.php?title=CUDA&section=5#GPUs_supported
+  # https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
+    set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86)
+  elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
+    set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75)
+  elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
+    set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72)
+  elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
+    set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
+  endif()
+
+  add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
+
+  if(NOT ${PCIUTILS_FOUND})
+    add_dependencies(cuda_backend pciutils)
+  endif()
+
+  target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
+
+  target_link_libraries(cuda_backend PRIVATE cudart)
+  target_link_libraries(gpufetch cuda_backend)
+endif()
+
+target_link_libraries(gpufetch pci z)
+install(TARGETS gpufetch DESTINATION bin)
+
+if(NOT WIN32)
+  string(ASCII 27 Esc)
+  set(ColorReset "${Esc}[m")
+  set(ColorBold  "${Esc}[1m")
+  set(Red         "${Esc}[31m")
+  set(Green       "${Esc}[32m")
+  set(BoldRed     "${Esc}[1;31m")
+  set(BoldGreen   "${Esc}[1;32m")
+  set(BoldYellow  "${Esc}[1;33m")
+endif()
+
+message(STATUS "----------------------")
+message(STATUS "gpufetch build report:")
+if(ENABLE_INTEL_BACKEND)
+  message(STATUS "Intel backend: ${BoldGreen}ON${ColorReset}")
+else()
+  message(STATUS "Intel backend: ${BoldRed}OFF${ColorReset}")
+endif()
+if(ENABLE_CUDA_BACKEND)
+  message(STATUS "CUDA backend: ${BoldGreen}ON${ColorReset}")
+else()
+  message(STATUS "CUDA backend: ${BoldRed}OFF${ColorReset}")
+endif()
+message(STATUS "----------------------")
--- a/53
+++ b/53
@@ -1,53 +0,0 @@
-CXX ?= g++
-CUDA_PATH ?= /usr/local/cuda/
-PREFIX ?= /usr
-
-CXXFLAGS+=-Wall -Wextra -pedantic -fstack-protector-all -pedantic
-SANITY_FLAGS=-Wfloat-equal -Wshadow -Wpointer-arith
-
-SRC_COMMON=src/common/
-SRC_CUDA=src/cuda/
-
-COMMON_SRC = $(SRC_COMMON)main.cpp  $(SRC_COMMON)gpu.cpp $(SRC_COMMON)args.cpp $(SRC_COMMON)global.cpp $(SRC_COMMON)printer.cpp
-COMMON_HDR = $(SRC_COMMON)ascii.hpp $(SRC_COMMON)gpu.hpp $(SRC_COMMON)args.hpp $(SRC_COMMON)global.hpp $(SRC_COMMON)printer.hpp
-
-CUDA_SRC = $(SRC_CUDA)cuda.cpp $(SRC_CUDA)uarch.cpp $(SRC_CUDA)pci.cpp $(SRC_CUDA)nvmlb.cpp
-CUDA_HDR = $(SRC_CUDA)cuda.hpp $(SRC_CUDA)uarch.hpp $(SRC_CUDA)pci.hpp $(SRC_CUDA)nvmlb.hpp $(SRC_CUDA)chips.hpp
-
-SOURCE += $(COMMON_SRC) $(CUDA_SRC)
-HEADERS += $(COMMON_HDR) $(CUDA_HDR)
-
-OUTPUT=gpufetch
-
-CXXFLAGS+= -I $(CUDA_PATH)/samples/common/inc -I $(CUDA_PATH)/targets/x86_64-linux/include -L $(CUDA_PATH)/targets/x86_64-linux/lib -lcudart -lnvidia-ml
-
-all: CXXFLAGS += -O3
-all: $(OUTPUT)
-
-debug: CXXFLAGS += -g -O0
-debug: $(OUTPUT)
-
-static: CXXFLAGS += -static -O3
-static: $(OUTPUT)
-
-strict: CXXFLAGS += -O3 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2
-strict: $(OUTPUT)
-
-$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
-	$(CXX) $(CXXFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)
-
-run: $(OUTPUT)
-	./$(OUTPUT)
-
-clean:
-	@rm -f $(OUTPUT)
-
-install: $(OUTPUT)
-	install -Dm755 "gpufetch"   "$(DESTDIR)$(PREFIX)/bin/gpufetch"
-	install -Dm644 "LICENSE"    "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
-	install -Dm644 "gpufetch.1" "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"
-
-uninstall:
-	rm -f "$(DESTDIR)$(PREFIX)/bin/gpufetch"
-	rm -f "$(DESTDIR)$(PREFIX)/share/licenses/gpufetch-git/LICENSE"
-	rm -f "$(DESTDIR)$(PREFIX)/share/man/man1/gpufetch.1.gz"
--- a/README.md
+++ b/README.md
@@ -1,19 +1,30 @@
 <p align="center"><img width=50% src="./pictures/gpufetch.png"></p>

-<div align="center">
-
-![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/Dr-Noob/gpufetch?label=gpufetch)
-[![GitHub Repo stars](https://img.shields.io/github/stars/Dr-Noob/gpufetch?color=4CC61F)](https://github.com/Dr-Noob/gpufetch/stargazers)
-[![GitHub issues](https://img.shields.io/github/issues/Dr-Noob/gpufetch)](https://github.com/Dr-Noob/gpufetch/issues)
-[![License](https://img.shields.io/github/license/Dr-Noob/gpufetch?color=orange)](https://github.com/Dr-Noob/gpufetch/blob/master/LICENSE)
-
 <h4 align="center">Simple yet fancy GPU architecture fetching tool</h4>
-&nbsp;

-![gpu_img](pictures/2080ti.png)
+<p align="center"> </p>

+<div align="center">
+  <img height="22px" src="https://img.shields.io/github/v/tag/Dr-Noob/gpufetch?label=gpufetch&style=flat-square">
+  <a href="https://github.com/Dr-Noob/gpufetch/stargazers">
+    <img height="22px" src="https://img.shields.io/github/stars/Dr-Noob/gpufetch?color=4CC61F&style=flat-square">
+  </a>
+  <a href="https://github.com/Dr-Noob/gpufetch/issues">
+    <img height="22px" src="https://img.shields.io/github/issues/Dr-Noob/gpufetch?style=flat-square">
+  </a>
+  <a href="https://github.com/Dr-Noob/gpufetch/blob/master/LICENSE">
+    <img height="22px" src="https://img.shields.io/github/license/Dr-Noob/gpufetch?color=orange&style=flat-square">
+  </a>
 </div>

+<p align="center"> </p>
+
+<p align="center">
+gpufetch is a command-line tool written in C that displays the GPU information in a clean and beautiful way
+</p>
+
+<p align="center"><img width=80% src="./pictures/2080ti.png"></p>
+
 # Table of contents
 <!-- UPDATE with: doctoc --notitle README.md -->
 <!-- START doctoc generated TOC please keep comment here to allow auto update -->
@@ -31,18 +42,28 @@
 gpufetch supports NVIDIA GPUs under Linux only.

 # 2. Installation (building from source)
-You will need a C++ compiler (e.g, `g++`), `make` and CUDA to compile `gpufetch`. To do so, just clone the repo and run `make`:
+You will need:
+
+- C++ compiler (e.g, `g++`)
+- `cmake`
+- `make`
+- CUDA (NVIDIA backend)
+- pciutils (optional)
+
+To build gpufetch, just clone the repo and run `./build.sh`:

 ```
 git clone https://github.com/Dr-Noob/gpufetch
 cd gpufetch
-make
+./build.sh
 ./gpufetch
 ```
-When building gpufetch, you may encounter an error telling you that it cannot find some CUDA header files. In this case, is very likely that the Makefile is unable to find your CUDA installation. This can be solved by setting `CUDA_PATH` to the correct CUDA installation path. For example:
+
+- NOTE 1: It is recomended to install the `pciutils` development package, which is needed by gpufetch. If it is not installed, it will be downloaded and built automatically just to compile gpufetch.
+- NOTE 2: When building gpufetch, cmake may fail if it is unable to find the CUDA installation. If CUDA is installed but CMake does not find it, you need to pass the CUDA path to cmake. You can do this easily by editing directly the `build.sh` script. For example:

 ```
-CUDA_PATH=/opt/cuda make
+cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
 ```

 # 3. Colors and style
--- a/build.sh
+++ b/build.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# gpufetch build script
+set -e
+
+rm -rf build/ gpufetch
+mkdir build/
+cd build/
+
+# In case you have CUDA installed but it is not detected,
+# - set CMAKE_CUDA_COMPILER to your nvcc binary:
+# - set CMAKE_CUDA_COMPILER_TOOLKIT_ROOT to the CUDA root dir
+# for example:
+# cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT=/usr/local/cuda/ ..
+
+cmake ..
+make -j$(nproc)
+cd -
+ln -s build/gpufetch .
--- a/cmake/FindPCIUTILS.cmake
+++ b/cmake/FindPCIUTILS.cmake
@@ -0,0 +1,29 @@
+# - Try to find the pciutils directory library
+# Once done this will define
+#
+#  PCIUTILS_FOUND - system has PCIUtils
+#  PCIUTILS_INCLUDE_DIR - the PCIUTILS include directory
+#  PCIUTILS_LIBRARIES - The libraries needed to use PCIUtils
+
+if(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
+   set(PCIUTILS_FIND_QUIETLY TRUE)
+endif(PCIUTILS_INCLUDE_DIR AND PCIUTILS_LIBRARIES)
+
+FIND_PATH(PCIUTILS_INCLUDE_DIR pci/pci.h)
+
+FIND_LIBRARY(PCIUTILS_LIBRARY NAMES pci)
+if(PCIUTILS_LIBRARY)
+  FIND_LIBRARY(RESOLV_LIBRARY NAMES resolv)
+  if(RESOLV_LIBRARY)
+    set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY} ${RESOLV_LIBRARY})
+  else(RESOLV_LIBRARY)
+    set(PCIUTILS_LIBRARIES ${PCIUTILS_LIBRARY})
+  endif(RESOLV_LIBRARY)
+endif(PCIUTILS_LIBRARY)
+
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCIUTILS DEFAULT_MSG PCIUTILS_LIBRARIES PCIUTILS_INCLUDE_DIR)
+
+MARK_AS_ADVANCED(PCIUTILS_INCLUDE_DIR PCIUTILS_LIBRARIES)
+
--- a/gpufetch.1
+++ b/gpufetch.1
@@ -0,0 +1,47 @@
+.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.48.3.
+.TH GPUFETCH "1" "August 2021" "gpufetch v0.10" "User Commands"
+.SH NAME
+gpufetch
+.SH SYNOPSIS
+.B gpufetch
+[\fI\,OPTION\/\fR]...
+.SH DESCRIPTION
+Simple yet fancy GPU architecture fetching tool
+.SH OPTIONS
+.TP
+\fB\-c\fR, \fB\-\-color\fR
+Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation
+.TP
+\fB\-g\fR, \fB\-\-gpu\fR
+Selects the GPU to use (default: 0)
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Prints this help and exit
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Prints gpufetch version and exit
+.SS "COLORS:"
+.IP
+Color scheme can be set using a predefined color scheme or a custom one:
+1. To use a predefined color scheme, the name of the scheme must be provided. Possible values are:
+* "nvidia":  Use NVIDIA default color scheme
+2. To use a custom color scheme, 4 colors must be given in RGB with the format: R,G,B:R,G,B:...
+The first 2 colors are the GPU art color and the following 2 colors are the text colors
+.SS "EXAMPLES:"
+.IP
+Run gpufetch with NVIDIA color scheme:
+.IP
+\&./gpufetch \fB\-\-color\fR nvidia
+.IP
+Run gpufetch with a custom color scheme:
+.IP
+\&./gpufetch \fB\-\-color\fR 239,90,45:210,200,200:100,200,45:0,200,200
+.SS "BUGS:"
+.IP
+Report bugs to https://github.com/Dr\-Noob/gpufetch/issues
+.SS "NOTE:"
+.IP
+Peak performance information is NOT accurate. gpufetch computes peak performance using the max
+frequency. However, to properly compute peak performance, you need to know the frequency of the
+GPU running real code.
+For peak performance measurement see: https://github.com/Dr\-Noob/peakperf
--- a/pictures/2080ti.png
+++ b/pictures/2080ti.png
--- a/src/common/args.cpp
+++ b/src/common/args.cpp
@@ -13,12 +13,18 @@
 #define NUM_COLORS         4

 #define COLOR_STR_NVIDIA "nvidia"
+#define COLOR_STR_INTEL  "intel"

-#define COLOR_DEFAULT_NVIDIA "118,185,0:255,255,255:255,255,255:118,185,0"
+//                              +-----------------------+-----------------------+
+//                              | Color logo            | Color text            |
+//                              | Color 1   | Color 2   | Color 1   | Color 2   |
+#define COLOR_DEFAULT_NVIDIA    "118,185,000:255,255,255:255,255,255:118,185,000"
+#define COLOR_DEFAULT_INTEL     "015,125,194:230,230,230:040,150,220:230,230,230"

 struct args_struct {
  bool help_flag;
  bool version_flag;
+  bool list_gpus;
  int gpu_idx;
  STYLE style;
  struct color** colors;
@@ -28,17 +34,19 @@ int errn = 0;
 static struct args_struct args;

 const char args_chr[] = {
-  /* [ARG_CHAR_COLOR]   = */ 'c',
-  /* [ARG_CHAR_GPU]     = */ 'g',
-  /* [ARG_CHAR_HELP]    = */ 'h',
-  /* [ARG_CHAR_VERSION] = */ 'V',
+  /* [ARG_COLOR]   = */ 'c',
+  /* [ARG_GPU]     = */ 'g',
+  /* [ARG_LIST]    = */ 'l',
+  /* [ARG_HELP]    = */ 'h',
+  /* [ARG_VERSION] = */ 'V',
 };

 const char *args_str[] = {
-  /* [ARG_CHAR_COLOR]   = */ "color",
-  /* [ARG_CHAR_GPU]     = */ "gpu",
-  /* [ARG_CHAR_HELP]    = */ "help",
-  /* [ARG_CHAR_VERSION] = */ "version",
+  /* [ARG_COLOR]   = */ "color",
+  /* [ARG_GPU]     = */ "gpu",
+  /* [ARG_LIST]    = */ "list-gpus",
+  /* [ARG_HELP]    = */ "help",
+  /* [ARG_VERSION] = */ "version",
 };

 int getarg_int(char* str) {
@@ -100,6 +108,10 @@ bool show_help() {
  return args.help_flag;
 }

+bool list_gpus() {
+  return args.list_gpus;
+}
+
 bool show_version() {
  return args.version_flag;
 }
@@ -119,8 +131,9 @@ char* build_short_options() {
  char* str = (char *) emalloc(sizeof(char) * (len*2 + 1));
  memset(str, 0, sizeof(char) * (len*2 + 1));

-  sprintf(str, "%c:%c:%c%c", c[ARG_GPU],
-  c[ARG_COLOR], c[ARG_HELP], c[ARG_VERSION]);
+  sprintf(str, "%c:%c:%c%c%c", c[ARG_GPU],
+  c[ARG_COLOR], c[ARG_HELP], c[ARG_LIST],
+  c[ARG_VERSION]);

  return str;
 }
@@ -137,6 +150,7 @@ bool parse_color(char* optarg_str, struct color*** cs) {
  bool free_ptr = true;

  if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA;
+  else if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) color_to_copy = COLOR_DEFAULT_INTEL;
  else {
    str_to_parse = optarg_str;
    free_ptr = false;
@@ -185,12 +199,14 @@ bool parse_args(int argc, char* argv[]) {

  args.version_flag = false;
  args.help_flag = false;
+  args.list_gpus = false;
  args.gpu_idx = 0;
  args.colors = NULL;

  const struct option long_options[] = {
    {args_str[ARG_COLOR],   required_argument, 0, args_chr[ARG_COLOR]   },
    {args_str[ARG_GPU],     required_argument, 0, args_chr[ARG_GPU]     },
+    {args_str[ARG_LIST],    no_argument,       0, args_chr[ARG_LIST]    },
    {args_str[ARG_HELP],    no_argument,       0, args_chr[ARG_HELP]    },
    {args_str[ARG_VERSION], no_argument,       0, args_chr[ARG_VERSION] },
    {0, 0, 0, 0}
@@ -199,7 +215,7 @@ bool parse_args(int argc, char* argv[]) {
  char* short_options = build_short_options();
  opt = getopt_long(argc, argv, short_options, long_options, &option_index);

-  while (!args.help_flag && !args.version_flag && opt != -1) {
+  while (!args.help_flag && !args.version_flag && !args.list_gpus && opt != -1) {
    if(opt == args_chr[ARG_COLOR]) {
      args.colors = (struct color **) emalloc(sizeof(struct color *) * NUM_COLORS);
      if(!parse_color(optarg, &args.colors)) {
@@ -215,6 +231,9 @@ bool parse_args(int argc, char* argv[]) {
        return false;
      }
    }
+    else if(opt == args_chr[ARG_LIST]) {
+      args.list_gpus = true;
+    }
    else if(opt == args_chr[ARG_HELP]) {
      args.help_flag = true;
    }
--- a/src/common/args.hpp
+++ b/src/common/args.hpp
@@ -21,6 +21,7 @@ enum {
 enum {
  ARG_COLOR,
  ARG_GPU,
+  ARG_LIST,
  ARG_HELP,
  ARG_VERSION
 };
@@ -33,6 +34,7 @@ extern const char *args_str[];
 int max_arg_str_length();
 bool parse_args(int argc, char* argv[]);
 bool show_help();
+bool list_gpus();
 bool show_version();
 void free_colors_struct(struct color** cs);
 int get_gpu_idx();
--- a/src/common/ascii.hpp
+++ b/src/common/ascii.hpp
@@ -1,32 +1,32 @@
 #ifndef __ASCII__
 #define __ASCII__

-#define COLOR_NONE         ""
-#define COLOR_FG_BLACK     "\x1b[30;1m"
-#define COLOR_FG_RED       "\x1b[31;1m"
-#define COLOR_FG_GREEN     "\x1b[32;1m"
-#define COLOR_FG_YELLOW    "\x1b[33;1m"
-#define COLOR_FG_BLUE      "\x1b[34;1m"
-#define COLOR_FG_MAGENTA   "\x1b[35;1m"
-#define COLOR_FG_CYAN      "\x1b[36;1m"
-#define COLOR_FG_WHITE     "\x1b[37;1m"
-#define COLOR_BG_BLACK     "\x1b[40;1m"
-#define COLOR_BG_RED       "\x1b[41;1m"
-#define COLOR_BG_GREEN     "\x1b[42;1m"
-#define COLOR_BG_YELLOW    "\x1b[43;1m"
-#define COLOR_BG_BLUE      "\x1b[44;1m"
-#define COLOR_BG_MAGENTA   "\x1b[45;1m"
-#define COLOR_BG_CYAN      "\x1b[46;1m"
-#define COLOR_BG_WHITE     "\x1b[47;1m"
-#define COLOR_FG_B_BLACK   "\x1b[90;1m"
-#define COLOR_FG_B_RED     "\x1b[91;1m"
-#define COLOR_FG_B_GREEN   "\x1b[92;1m"
-#define COLOR_FG_B_YELLOW  "\x1b[93;1m"
-#define COLOR_FG_B_BLUE    "\x1b[94;1m"
-#define COLOR_FG_B_MAGENTA "\x1b[95;1m"
-#define COLOR_FG_B_CYAN    "\x1b[96;1m"
-#define COLOR_FG_B_WHITE   "\x1b[97;1m"
-#define COLOR_RESET        "\x1b[m"
+#define C_NONE         ""
+#define C_FG_BLACK     "\x1b[30;1m"
+#define C_FG_RED       "\x1b[31;1m"
+#define C_FG_GREEN     "\x1b[32;1m"
+#define C_FG_YELLOW    "\x1b[33;1m"
+#define C_FG_BLUE      "\x1b[34;1m"
+#define C_FG_MAGENTA   "\x1b[35;1m"
+#define C_FG_CYAN      "\x1b[36;1m"
+#define C_FG_WHITE     "\x1b[37;1m"
+#define C_BG_BLACK     "\x1b[40;1m"
+#define C_BG_RED       "\x1b[41;1m"
+#define C_BG_GREEN     "\x1b[42;1m"
+#define C_BG_YELLOW    "\x1b[43;1m"
+#define C_BG_BLUE      "\x1b[44;1m"
+#define C_BG_MAGENTA   "\x1b[45;1m"
+#define C_BG_CYAN      "\x1b[46;1m"
+#define C_BG_WHITE     "\x1b[47;1m"
+#define C_FG_B_BLACK   "\x1b[90;1m"
+#define C_FG_B_RED     "\x1b[91;1m"
+#define C_FG_B_GREEN   "\x1b[92;1m"
+#define C_FG_B_YELLOW  "\x1b[93;1m"
+#define C_FG_B_BLUE    "\x1b[94;1m"
+#define C_FG_B_MAGENTA "\x1b[95;1m"
+#define C_FG_B_CYAN    "\x1b[96;1m"
+#define C_FG_B_WHITE   "\x1b[97;1m"
+#define C_RESET        "\x1b[m"

 struct ascii_logo {
  const char* art;
@@ -59,6 +59,23 @@ $C2##   ##  ##   ##  ##  ##   ##  ##   #: :#    \
 $C2##   ##   ## ##   ##  ##   ##  ##  #######   \
 $C2##   ##    ###    ##  ######   ## ##     ##  "

+#define ASCII_INTEL \
+"$C1                   .#################.          \
+$C1              .####                   ####.     \
+$C1          .##                             ###   \
+$C1       ##                          :##     ###  \
+$C1    #                ##            :##      ##  \
+$C1  ##   ##  ######.   ####  ######  :##      ##  \
+$C1 ##    ##  ##:  ##:  ##   ##   ### :##     ###  \
+$C1##     ##  ##:  ##:  ##  :######## :##    ##    \
+$C1##     ##  ##:  ##:  ##   ##.   .  :## ####     \
+$C1##      #  ##:  ##:  ####  #####:   ##          \
+$C1 ##                                             \
+$C1  ###.                         ..o####.         \
+$C1   ######oo...         ..oo#######              \
+$C1          o###############o                     "
+
+// LONG LOGOS
 #define ASCII_NVIDIA_L \
 "$C1                  MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM  \
 $C1                  MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM  \
@@ -76,14 +93,37 @@ $C1            olcc::;              ,:ccloMMMMMMMMM  \
 $C1                  :......oMMMMMMMMMMMMMMMMMMMMMM  \
 $C1                  :lllMMMMMMMMMMMMMMMMMMMMMMMMMM  "

+#define ASCII_INTEL_L \
+"$C1                               ###############@               \
+$C1                       ######@                ######@         \
+$C1                  ###@                              ###@      \
+$C1              ##@                                     ###@    \
+$C1         ##@                                             ##@  \
+$C1         ##@                                             ##@  \
+$C1      @                    ##@                ##@        ##@  \
+$C1    #@   ##@   ########@   #####@   #####@    ##@        ##@  \
+$C1   #@    ##@   ##@    ##@  ##@    ###@  ###@  ##@        ##@  \
+$C1  #@     ##@   ##@    ##@  ##@    ##@    ##@  ##@       ##@   \
+$C1 #@      ##@   ##@    ##@  ##@    #########@  ##@     ###@    \
+$C1 #@      ##@   ##@    ##@  ##@    ##@         ##@   ####@     \
+$C1 #@       #@   ##@    ##@   ####@  ########@   #@  ##@        \
+$C1 ##@                                                          \
+$C1  ##@                                                         \
+$C1  ###@                                        ###@            \
+$C1    ####@                               #########@            \
+$C1      #########@               ###############@               \
+$C1          ##############################@                     "
+
 typedef struct ascii_logo asciiL;

-//                      ------------------------------------------------------------------------------------------------------
-//                      | LOGO          | W | H | REPLACE | COLORS LOGO (>0 && <10)        | COLORS TEXT (=2)                |
-//                      ------------------------------------------------------------------------------------------------------
-asciiL logo_nvidia    = { ASCII_NVIDIA,    45, 19, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} };
-// Long variants        | ---------------------------------------------------------------------------------------------------|
-asciiL logo_nvidia_l  = { ASCII_NVIDIA_L,  50, 15, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} };
-asciiL logo_unknown   = { NULL,            0,  0,  false, {COLOR_NONE},                     {COLOR_NONE,    COLOR_NONE}      };
+//                      ------------------------------------------------------------------------------------------
+//                      | LOGO            | W | H | REPLACE | COLORS LOGO           | COLORS TEXT                |
+//                      ------------------------------------------------------------------------------------------
+asciiL logo_nvidia    = { ASCII_NVIDIA,    45, 19, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
+asciiL logo_intel     = { ASCII_INTEL,     48, 14, false, {C_FG_CYAN},              {C_FG_CYAN,  C_FG_WHITE} };
+// Long variants        | ---------------------------------------------------------------------------------------|
+asciiL logo_nvidia_l  = { ASCII_NVIDIA_L,  50, 15, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
+asciiL logo_intel_l   = { ASCII_INTEL_L,   62, 19, true,  {C_BG_CYAN, C_BG_WHITE},  {C_FG_CYAN,  C_FG_WHITE} };
+asciiL logo_unknown   = { NULL,            0,  0,  false, {C_NONE},                 {C_NONE,     C_NONE}     };

 #endif
--- a/src/common/global.hpp
+++ b/src/common/global.hpp
@@ -2,7 +2,6 @@
 #define __GLOBAL__

 #include <stdbool.h>
-#include <stddef.h>
 #include <cstddef>

 #define STRING_UNKNOWN "Unknown"
--- a/src/common/gpu.cpp
+++ b/src/common/gpu.cpp
@@ -32,8 +32,6 @@ VENDOR get_gpu_vendor(struct gpu_info* gpu) {
  return gpu->vendor;
 }

-double trunc(double val) { return ((int)(100 * val)) / 100.0; }
-
 int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
  int32_t ret;
  int max_len = 10; // Max is 8 for digits, 2 for units
@@ -116,17 +114,17 @@ char* get_str_l2(struct gpu_info* gpu) {
  return string;
 }

-char* get_str_peak_performance(struct gpu_info* gpu) {
+char* get_str_peak_performance_generic(int64_t pp) {
  char* str;

-  if(gpu->peak_performance == -1) {
+  if(pp == -1) {
    str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
    strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
    return str;
  }

  // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
-  double flopsd = (double) gpu->peak_performance;
+  double flopsd = (double) pp;
  uint32_t max_size = 7+1+7+1;
  str = (char *) ecalloc(max_size, sizeof(char));

@@ -139,3 +137,19 @@ char* get_str_peak_performance(struct gpu_info* gpu) {

  return str;
 }
+
+char* get_str_peak_performance(struct gpu_info* gpu) {
+  return get_str_peak_performance_generic(gpu->peak_performance);
+}
+
+char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
+  return get_str_peak_performance_generic(gpu->peak_performance_tcu);
+}
+
+char* get_str_generic(int32_t data) {
+  // Largest int is 10, +1 for possible negative, +1 for EOL
+  uint32_t max_size = 12;
+  char* dummy = (char *) ecalloc(max_size, sizeof(char));
+  snprintf(dummy, max_size, "%d", data);
+  return dummy;
+}
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -4,13 +4,13 @@
 #include <stdint.h>
 #include <stdbool.h>

-#include "../cuda/nvmlb.hpp"
 #include "../cuda/pci.hpp"

 #define UNKNOWN_FREQ -1

 enum {
-  GPU_VENDOR_NVIDIA
+  GPU_VENDOR_NVIDIA,
+  GPU_VENDOR_INTEL
 };

 enum {
@@ -41,6 +41,13 @@ struct topology {
  int32_t streaming_mp;
  int32_t cores_per_mp;
  int32_t cuda_cores;
+  int32_t tensor_cores;
+};
+
+struct topology_i {
+  int32_t slices;
+  int32_t subslices;
+  int32_t eu_subslice;
 };

 struct memory {
@@ -57,11 +64,12 @@ struct gpu_info {
  char* name;
  int64_t freq;
  struct pci* pci;
-  struct nvml_data* nvmld;
  struct topology* topo;
+  struct topology_i* topo_i;
  struct memory* mem;
  struct cache* cach;
  int64_t peak_performance;
+  int64_t peak_performance_tcu;
  int32_t idx;
 };

@@ -74,5 +82,7 @@ char* get_str_bus_width(struct gpu_info* gpu);
 char* get_str_memory_clock(struct gpu_info* gpu);
 char* get_str_l2(struct gpu_info* gpu);
 char* get_str_peak_performance(struct gpu_info* gpu);
+char* get_str_peak_performance_tensor(struct gpu_info* gpu);
+char* get_str_generic(int32_t data);

 #endif
--- a/src/common/main.cpp
+++ b/src/common/main.cpp
@@ -4,10 +4,11 @@

 #include "args.hpp"
 #include "global.hpp"
+#include "master.hpp"
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"

-static const char* VERSION = "0.10";
+static const char* VERSION = "0.11";

 void print_help(char *argv[]) {
  const char **t = args_str;
@@ -18,10 +19,11 @@ void print_help(char *argv[]) {
  printf("Simple yet fancy GPU architecture fetching tool\n\n");

  printf("Options: \n");
-  printf("  -%c, --%s %*s Sets the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), "");
-  printf("  -%c, --%s %*s Selects the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), "");
-  printf("  -%c, --%s %*s Prints this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
-  printf("  -%c, --%s %*s Prints gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
+  printf("  -%c, --%s %*s Set the color scheme (by default, gpufetch uses the system color scheme) See COLORS section for a more detailed explanation\n", c[ARG_COLOR], t[ARG_COLOR], (int) (max_len-strlen(t[ARG_COLOR])), "");
+  printf("  -%c, --%s %*s List the available GPUs in the system\n", c[ARG_LIST], t[ARG_LIST], (int) (max_len-strlen(t[ARG_LIST])), "");
+  printf("  -%c, --%s %*s Select the GPU to use (default: 0)\n", c[ARG_GPU], t[ARG_GPU], (int) (max_len-strlen(t[ARG_GPU])), "");
+  printf("  -%c, --%s %*s Print this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
+  printf("  -%c, --%s %*s Print gpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");

  printf("\nCOLORS: \n");
  printf("  Color scheme can be set using a predefined color scheme or a custom one:\n");
@@ -64,14 +66,19 @@ int main(int argc, char* argv[]) {
    return EXIT_SUCCESS;
  }

+  struct gpu_list* list = get_gpu_list();
+  if(list_gpus()) {
+    return print_gpus_list(list);
+  }
+
  set_log_level(true);

-  printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\
+  printf("[WARNING]: gpufetch is in beta. The provided information may be incomplete or wrong.\n\
 If you want to help to improve gpufetch, please compare the output of the program\n\
 with a reliable source which you know is right (e.g, techpowerup.com) and report\n\
-any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues");
+any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues\n");

-  struct gpu_info* gpu = get_gpu_info(get_gpu_idx());
+  struct gpu_info* gpu = get_gpu_info(list, get_gpu_idx());
  if(gpu == NULL)
    return EXIT_FAILURE;

--- a/src/common/master.cpp
+++ b/src/common/master.cpp
@@ -0,0 +1,62 @@
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "master.hpp"
+#include "../cuda/cuda.hpp"
+#include "../intel/intel.hpp"
+
+#define MAX_GPUS 1000
+
+struct gpu_list {
+  struct gpu_info ** gpus;
+  int num_gpus;
+};
+
+struct gpu_list* get_gpu_list() {
+  int idx = 0;
+  struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
+  list->num_gpus = 0;
+  list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
+
+#ifdef BACKEND_CUDA
+  bool valid = true;
+
+  while(valid) {
+    list->gpus[idx] = get_gpu_info_cuda(idx);
+    if(list->gpus[idx] != NULL) idx++;
+    else valid = false;
+  }
+
+  list->num_gpus += idx;
+#endif
+
+#ifdef BACKEND_INTEL
+  list->gpus[idx] = get_gpu_info_intel();
+  if(list->gpus[idx] != NULL) list->num_gpus++;
+#endif
+
+  return list;
+}
+
+bool print_gpus_list(struct gpu_list* list) {
+  for(int i=0; i < list->num_gpus; i++) {
+    printf("GPU %d: ", i);
+    if(list->gpus[i]->vendor == GPU_VENDOR_NVIDIA) {
+      #ifdef BACKEND_CUDA
+        print_gpu_cuda(list->gpus[i]);
+      #endif
+    }
+    else if(list->gpus[i]->vendor == GPU_VENDOR_INTEL) {
+      #ifdef BACKEND_INTEL
+        print_gpu_intel(list->gpus[i]);
+      #endif
+    }
+  }
+
+  return true;
+}
+
+struct gpu_info* get_gpu_info(struct gpu_list* list, int idx) {
+  return list->gpus[idx];
+}
--- a/src/common/master.hpp
+++ b/src/common/master.hpp
@@ -0,0 +1,12 @@
+#ifndef __GPU_LIST__
+#define __GPU_LIST__
+
+#include "gpu.hpp"
+
+struct gpu_list;
+
+struct gpu_list* get_gpu_list();
+bool print_gpus_list(struct gpu_list* list);
+struct gpu_info* get_gpu_info(struct gpu_list* list, int idx);
+
+#endif
--- a/src/common/pci.cpp
+++ b/src/common/pci.cpp
@@ -0,0 +1,73 @@
+#include "global.hpp"
+#include "pci.hpp"
+#include <cstddef>
+
+#define CLASS_VGA_CONTROLLER 0x0300
+
+bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
+  for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
+    if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
+      return true;
+    }
+  }
+
+  printWarn("Unable to find a valid device for id %d using pciutils", id);
+  return false;
+}
+
+uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) {
+  for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
+   if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
+      return dev->device_id;
+    }
+  }
+
+  printErr("Unable to find a valid device for id %d using pciutils", id);
+  return 0;
+}
+
+void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) {
+  bool found = false;
+
+  for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
+   if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
+      pci->domain = dev->domain;
+      pci->bus = dev->bus;
+      pci->dev = dev->dev;
+      pci->func = dev->func;
+      found = true;
+    }
+  }
+
+  if(!found) printErr("Unable to find a valid device for id %d using pciutils", id);
+}
+
+struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) {
+  struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
+
+  // TODO: Refactor this; instead of 2xGet + 1xSet, do it better
+  if(pciutils_is_vendor_id_present(devices, id)) {
+    pci->vendor_id = id;
+    pci->device_id = pciutils_get_pci_device_id(devices, id);
+    pciutils_set_pci_bus(pci, devices, id);
+    return pci;
+  }
+  else {
+    return NULL;
+  }
+}
+
+struct pci_dev *get_pci_devices_from_pciutils() {
+  struct pci_access *pacc;
+  struct pci_dev *dev;
+
+  pacc = pci_alloc();
+  pci_init(pacc);
+  pci_scan_bus(pacc);
+
+  for (dev=pacc->devices; dev; dev=dev->next) {
+    pci_fill_info(dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS);
+  }
+
+  return pacc->devices;
+}
--- a/src/common/pci.hpp
+++ b/src/common/pci.hpp
@@ -0,0 +1,21 @@
+#ifndef __GPUFETCH_PCI__
+#define __GPUFETCH_PCI__
+
+#include <cstdint>
+extern "C" {
+  #include <pci/pci.h>
+}
+
+struct pci {
+  uint16_t vendor_id;
+  uint16_t device_id;
+  uint16_t domain;
+  uint16_t bus;
+  uint16_t dev;
+  uint16_t func;
+};
+
+struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id);
+struct pci_dev *get_pci_devices_from_pciutils();
+
+#endif
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -9,6 +9,8 @@
 #include "../common/global.hpp"
 #include "../common/gpu.hpp"

+#include "../intel/uarch.hpp"
+#include "../intel/intel.hpp"
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"

@@ -34,15 +36,19 @@ enum {
  ATTRIBUTE_CHIP,
  ATTRIBUTE_UARCH,
  ATTRIBUTE_TECHNOLOGY,
+  ATTRIBUTE_GT,
  ATTRIBUTE_FREQUENCY,
  ATTRIBUTE_STREAMINGMP,
  ATTRIBUTE_CORESPERMP,
  ATTRIBUTE_CUDA_CORES,
+  ATTRIBUTE_TENSOR_CORES,
+  ATTRIBUTE_EUS,
  ATTRIBUTE_L2,
  ATTRIBUTE_MEMORY,
  ATTRIBUTE_MEMORY_FREQ,
  ATTRIBUTE_BUS_WIDTH,
-  ATTRIBUTE_PEAK
+  ATTRIBUTE_PEAK,
+  ATTRIBUTE_PEAK_TENSOR,
 };

 static const char* ATTRIBUTE_FIELDS [] = {
@@ -50,15 +56,19 @@ static const char* ATTRIBUTE_FIELDS [] = {
  "GPU processor:",
  "Microarchitecture:",
  "Technology:",
+  "Graphics Tier:",
  "Max Frequency:",
  "SMs:",
  "Cores/SM:",
-  "CUDA cores:",
+  "CUDA Cores:",
+  "Tensor Cores:",
+  "Execution Units:",
  "L2 Size:",
  "Memory:",
  "Memory frequency:",
  "Bus width:",
  "Peak Performance:",
+  "Peak Performance (MMA):",
 };

 static const char* ATTRIBUTE_FIELDS_SHORT [] = {
@@ -66,15 +76,19 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
  "Processor:",
  "uArch:",
  "Technology:",
+  "GT:",
  "Max Freq.:",
  "SMs:",
  "Cores/SM:",
-  "CUDA cores:",
+  "CUDA Cores:",
+  "Tensor Cores:",
+  "EUs:",
  "L2 Size:",
  "Memory:",
  "Memory freq.:",
  "Bus width:",
  "Peak Perf.:",
+  "Peak Perf.(MMA):",
 };

 struct terminal {
@@ -194,23 +208,32 @@ void replace_bgbyfg_color(struct ascii_logo* logo) {
  for(int i=0; i < 2; i++) {
    if(logo->color_ascii[i] == NULL) break;

-    if(strcmp(logo->color_ascii[i], COLOR_BG_BLACK) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLACK);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_RED) == 0) strcpy(logo->color_ascii[i], COLOR_FG_RED);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_GREEN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_GREEN);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], COLOR_FG_YELLOW);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_BLUE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLUE);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], COLOR_FG_MAGENTA);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_CYAN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_CYAN);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_WHITE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_WHITE);
+    if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
+    else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
+    else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
+    else if(strcmp(logo->color_ascii[i], C_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], C_FG_YELLOW);
+    else if(strcmp(logo->color_ascii[i], C_BG_BLUE) == 0) strcpy(logo->color_ascii[i], C_FG_BLUE);
+    else if(strcmp(logo->color_ascii[i], C_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], C_FG_MAGENTA);
+    else if(strcmp(logo->color_ascii[i], C_BG_CYAN) == 0) strcpy(logo->color_ascii[i], C_FG_CYAN);
+    else if(strcmp(logo->color_ascii[i], C_BG_WHITE) == 0) strcpy(logo->color_ascii[i], C_FG_WHITE);
+  }
+}
+
+struct ascii_logo* choose_ascii_art_aux(struct ascii_logo* logo_long, struct ascii_logo* logo_short, struct terminal* term, int lf) {
+  if(ascii_fits_screen(term->w, *logo_long, lf)) {
+    return logo_long;
+  }
+  else {
+    return logo_short;
  }
 }

 void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) {
  if(art->vendor == GPU_VENDOR_NVIDIA) {
-    if(term != NULL && ascii_fits_screen(term->w, logo_nvidia_l, lf))
-      art->art = &logo_nvidia_l;
-    else
-      art->art = &logo_nvidia;
+    art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf);
+  }
+  else if(art->vendor == GPU_VENDOR_INTEL) {
+    art->art = choose_ascii_art_aux(&logo_intel_l, &logo_intel, term, lf);
  }
  else {
    art->art = &logo_unknown;
@@ -222,10 +245,10 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
  switch(art->style) {
    case STYLE_LEGACY:
      logo->replace_blocks = false;
-      strcpy(logo->color_text[0], COLOR_NONE);
-      strcpy(logo->color_text[1], COLOR_NONE);
-      strcpy(logo->color_ascii[0], COLOR_NONE);
-      strcpy(logo->color_ascii[1], COLOR_NONE);
+      strcpy(logo->color_text[0], C_NONE);
+      strcpy(logo->color_text[1], C_NONE);
+      strcpy(logo->color_ascii[0], C_NONE);
+      strcpy(logo->color_ascii[1], C_NONE);
      art->reset[0] = '\0';
      break;
    case STYLE_RETRO:
@@ -239,7 +262,7 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
        strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true));
        strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true));
      }
-      strcpy(art->reset, COLOR_RESET);
+      strcpy(art->reset, C_RESET);
      break;
    case STYLE_INVALID:
    default:
@@ -336,6 +359,48 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
  printf("\n");
 }

+#ifdef BACKEND_INTEL
+bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
+  struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
+
+  if(art == NULL)
+    return false;
+
+  char* gpu_name = get_str_gpu_name(gpu);
+  char* uarch = get_str_uarch_intel(gpu->arch);
+  char* gt = get_str_gt(gpu->arch);
+  char* manufacturing_process = get_str_process(gpu->arch);
+  char* eus = get_str_eu(gpu);
+  char* max_frequency = get_str_freq(gpu);
+  char* pp = get_str_peak_performance(gpu);
+
+  setAttribute(art, ATTRIBUTE_NAME, gpu_name);
+  setAttribute(art, ATTRIBUTE_UARCH, uarch);
+  setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
+  setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
+  setAttribute(art, ATTRIBUTE_GT, gt);
+  setAttribute(art, ATTRIBUTE_EUS, eus);
+  setAttribute(art, ATTRIBUTE_PEAK, pp);
+
+  const char** attribute_fields = ATTRIBUTE_FIELDS;
+  uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
+  uint32_t longest_field = longest_field_length(art, longest_attribute);
+  choose_ascii_art(art, cs, term, longest_field);
+
+  if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
+    // Despite of choosing the smallest logo, the output does not fit
+    // Choose the shorter field names and recalculate the longest attr
+    attribute_fields = ATTRIBUTE_FIELDS_SHORT;
+    longest_attribute = longest_attribute_length(art, attribute_fields);
+  }
+
+  print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
+
+  return true;
+}
+#endif
+
+#ifdef BACKEND_CUDA
 bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
  struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);

@@ -344,12 +409,13 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc

  char* gpu_name = get_str_gpu_name(gpu);
  char* gpu_chip = get_str_chip(gpu->arch);
-  char* uarch = get_str_uarch(gpu->arch);
+  char* uarch = get_str_uarch_cuda(gpu->arch);
  char* comp_cap = get_str_cc(gpu->arch);
  char* manufacturing_process = get_str_process(gpu->arch);
  char* sms = get_str_sm(gpu);
  char* corespersm = get_str_cores_sm(gpu);
  char* cores = get_str_cuda_cores(gpu);
+  char* tensorc = get_str_tensor_cores(gpu);
  char* max_frequency = get_str_freq(gpu);
  char* l2 = get_str_l2(gpu);
  char* mem_size = get_str_memory_size(gpu);
@@ -357,6 +423,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
  char* mem_freq = get_str_memory_clock(gpu);
  char* bus_width = get_str_bus_width(gpu);
  char* pp = get_str_peak_performance(gpu);
+  char* pp_tensor = get_str_peak_performance_tensor(gpu);

  char* mem = (char *) emalloc(sizeof(char) * (strlen(mem_size) + strlen(mem_type) + 2));
  sprintf(mem, "%s %s", mem_size, mem_type);
@@ -372,11 +439,17 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc
  setAttribute(art, ATTRIBUTE_STREAMINGMP, sms);
  setAttribute(art, ATTRIBUTE_CORESPERMP, corespersm);
  setAttribute(art, ATTRIBUTE_CUDA_CORES, cores);
+  if(gpu->topo->tensor_cores > 0) {
+    setAttribute(art, ATTRIBUTE_TENSOR_CORES, tensorc);
+  }
  setAttribute(art, ATTRIBUTE_MEMORY, mem);
  setAttribute(art, ATTRIBUTE_MEMORY_FREQ, mem_freq);
  setAttribute(art, ATTRIBUTE_BUS_WIDTH, bus_width);
  setAttribute(art, ATTRIBUTE_L2, l2);
  setAttribute(art, ATTRIBUTE_PEAK, pp);
+  if(gpu->topo->tensor_cores > 0) {
+    setAttribute(art, ATTRIBUTE_PEAK_TENSOR, pp_tensor);
+  }

  const char** attribute_fields = ATTRIBUTE_FIELDS;
  uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
@@ -402,6 +475,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc

  return true;
 }
+#endif

 struct terminal* get_terminal_size() {
  struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal));
@@ -434,5 +508,17 @@ struct terminal* get_terminal_size() {
 bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) {
  struct terminal* term = get_terminal_size();

+  if(gpu->vendor == GPU_VENDOR_NVIDIA)
+    #ifdef BACKEND_CUDA
      return print_gpufetch_cuda(gpu, s, cs, term);
+    #else
+      return false;
+    #endif
+  else {
+    #ifdef BACKEND_INTEL
+      return print_gpufetch_intel(gpu, s, cs, term);
+    #else
+      return false;
+    #endif
+  }
 }
--- a/src/common/uarch.cpp
+++ b/src/common/uarch.cpp
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "global.hpp"
+#include "uarch.hpp"
+
+char* get_str_process(struct uarch* arch) {
+  char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
+  int32_t process = arch->process;
+
+  if(process == UNK) {
+    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
+  }
+  else if(process > 100) {
+    sprintf(str, "%.2fum", (double)process/100);
+  }
+  else if(process > 0){
+    sprintf(str, "%dnm", process);
+  }
+  else {
+    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
+    printBug("Found invalid process: '%d'", process);
+  }
+
+  return str;
+}
+
--- a/src/common/uarch.hpp
+++ b/src/common/uarch.hpp
@@ -0,0 +1,31 @@
+#ifndef __COMMON_UARCH__
+#define __COMMON_UARCH__
+
+// Data not available
+#define NA                   -1
+
+// Unknown manufacturing process
+#define UNK                  -1
+
+typedef uint32_t GPUCHIP;
+typedef uint32_t MICROARCH;
+
+struct uarch {
+  // NVIDIA specific
+  int32_t cc_major;
+  int32_t cc_minor;
+  int32_t compute_capability;
+
+  // Intel specific
+  int32_t gt;
+  int32_t eu;
+
+  MICROARCH uarch;
+  GPUCHIP chip;
+
+  int32_t process;
+  char* uarch_str;
+  char* chip_str;
+};
+
+#endif
--- a/src/cuda/chips.hpp
+++ b/src/cuda/chips.hpp
@@ -1,10 +1,10 @@
-#ifndef __GPUCHIPS__
-#define __GPUCHIPS__
+#ifndef __CUDA_GPUCHIPS__
+#define __CUDA_GPUCHIPS__

 typedef uint32_t GPUCHIP;

 enum {
-  CHIP_UNKNOWN,
+  CHIP_UNKNOWN_CUDA,
  CHIP_G80,
  CHIP_G80GL,
  CHIP_G84,
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -2,10 +2,18 @@
 #include <cuda_runtime.h>

 #include "cuda.hpp"
-#include "nvmlb.hpp"
 #include "uarch.hpp"
+#include "../common/pci.hpp"
 #include "../common/global.hpp"

+bool print_gpu_cuda(struct gpu_info* gpu) {
+  char* cc = get_str_cc(gpu->arch);
+  printf("%s (Compute Capability %s)\n", gpu->name, cc);
+  free(cc);
+
+  return true;
+}
+
 struct cache* get_cache_info(cudaDeviceProp prop) {
  struct cache* cach = (struct cache*) emalloc(sizeof(struct cache));

@@ -17,12 +25,19 @@ struct cache* get_cache_info(cudaDeviceProp prop) {
  return cach;
 }

+int get_tensor_cores(int sm, int major) {
+  if(major == 7) return sm * 8;
+  else if(major == 8) return sm * 4;
+  else return 0;
+}
+
 struct topology* get_topology_info(cudaDeviceProp prop) {
  struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));

  topo->streaming_mp = prop.multiProcessorCount;
  topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
  topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
+  topo->tensor_cores = get_tensor_cores(topo->streaming_mp, prop.major);

  return topo;
 }
@@ -60,11 +75,17 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
  return mem;
 }

-int64_t get_peak_performance(struct gpu_info* gpu) {
+// Compute peak performance when using CUDA cores
+int64_t get_peak_performance_cuda(struct gpu_info* gpu) {
  return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
 }

-struct gpu_info* get_gpu_info(int gpu_idx) {
+// Compute peak performance when using tensor cores
+int64_t get_peak_performance_tcu(struct gpu_info* gpu) {
+  return gpu->freq * 1000000 * 4 * 4 * 8 * gpu->topo->tensor_cores;
+}
+
+struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
  struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
  gpu->pci = NULL;
  gpu->idx = gpu_idx;
@@ -74,8 +95,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
    return NULL;
  }

+  if(gpu_idx == 0) {
    printf("Waiting for CUDA driver to start...");
    fflush(stdout);
+  }

  int num_gpus = -1;
  cudaError_t err = cudaSuccess;
@@ -83,7 +106,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
    printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
    return NULL;
  }
+
+  if(gpu_idx == 0) {
    printf("\r");
+  }

  if(num_gpus <= 0) {
    printErr("No CUDA capable devices found!");
@@ -91,7 +117,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
  }

  if(gpu->idx+1 > num_gpus) {
-    printErr("Requested GPU index %d in a system with %d GPUs", gpu->idx, num_gpus);
+    // Master is trying to query an invalid GPU
    return NULL;
  }

@@ -106,38 +132,31 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
  gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
  strcpy(gpu->name, deviceProp.name);

-  gpu->nvmld = nvml_init();
-  if(nvml_get_pci_info(gpu->idx, gpu->nvmld)) {
-    gpu->pci = get_pci_from_nvml(gpu->nvmld);
-  }
-
+  struct pci_dev *devices = get_pci_devices_from_pciutils();
+  gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA);
  gpu->arch = get_uarch_from_cuda(gpu);
  gpu->cach = get_cache_info(deviceProp);
  gpu->mem = get_memory_info(gpu, deviceProp);
  gpu->topo = get_topology_info(deviceProp);
-  gpu->peak_performance = get_peak_performance(gpu);
+  gpu->peak_performance = get_peak_performance_cuda(gpu);
+  gpu->peak_performance_tcu = get_peak_performance_tcu(gpu);

  return gpu;
 }

 char* get_str_sm(struct gpu_info* gpu) {
-  uint32_t max_size = 10;
-  char* dummy = (char *) ecalloc(max_size, sizeof(char));
-  snprintf(dummy, max_size, "%d", gpu->topo->streaming_mp);
-  return dummy;
+  return get_str_generic(gpu->topo->streaming_mp);
 }

 char* get_str_cores_sm(struct gpu_info* gpu) {
-  uint32_t max_size = 10;
-  char* dummy = (char *) ecalloc(max_size, sizeof(char));
-  snprintf(dummy, max_size, "%d", gpu->topo->cores_per_mp);
-  return dummy;
+  return get_str_generic(gpu->topo->cores_per_mp);
 }

 char* get_str_cuda_cores(struct gpu_info* gpu) {
-  uint32_t max_size = 10;
-  char* dummy = (char *) ecalloc(max_size, sizeof(char));
-  snprintf(dummy, max_size, "%d", gpu->topo->cuda_cores);
-  return dummy;
+  return get_str_generic(gpu->topo->cuda_cores);
+}
+
+char* get_str_tensor_cores(struct gpu_info* gpu) {
+  return get_str_generic(gpu->topo->tensor_cores);
 }

--- a/src/cuda/cuda.hpp
+++ b/src/cuda/cuda.hpp
@@ -1,11 +1,13 @@
-#ifndef __CUDA__
-#define __CUDA__
+#ifndef __CUDA_GPU__
+#define __CUDA_GPU__

 #include "../common/gpu.hpp"

-struct gpu_info* get_gpu_info(int gpu_idx);
+struct gpu_info* get_gpu_info_cuda(int gpu_idx);
+bool print_gpu_cuda(struct gpu_info* gpu);
 char* get_str_sm(struct gpu_info* gpu);
 char* get_str_cores_sm(struct gpu_info* gpu);
 char* get_str_cuda_cores(struct gpu_info* gpu);
+char* get_str_tensor_cores(struct gpu_info* gpu);

 #endif
--- a/src/cuda/nvmlb.cpp
+++ b/src/cuda/nvmlb.cpp
@@ -1,70 +0,0 @@
-#include <nvml.h>
-
-#include "nvmlb.hpp"
-#include "../common/global.hpp"
-
-struct nvml_data {
-  bool nvml_started;
-  nvmlPciInfo_t pci;
-};
-
-struct nvml_data* nvml_init() {
-  struct nvml_data* data = (struct nvml_data*) emalloc(sizeof(struct nvml_data));
-  data->nvml_started = false;
-
-  nvmlReturn_t result;
-
-  if ((result = nvmlInit()) != NVML_SUCCESS) {
-    printErr("nvmlInit: %s\n", nvmlErrorString(result));
-    return NULL;
-  }
-
-  data->nvml_started = true;
-  return data;
-}
-
-bool nvml_get_pci_info(int gpu_idx, struct nvml_data* data) {
-  nvmlReturn_t result;
-  nvmlDevice_t device;
-
-  if(!data->nvml_started) {
-    printErr("nvml_get_pci_info: nvml was not started");
-    return false;
-  }
-
-  if ((result = nvmlDeviceGetHandleByIndex(gpu_idx, &device)) != NVML_SUCCESS) {
-    printErr("nvmlDeviceGetHandleByIndex: %s\n", nvmlErrorString(result));
-    return false;
-  }
-
-  if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != NVML_SUCCESS) {
-    printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
-    return false;
-  }
-
-  return true;
-}
-
-uint16_t nvml_get_pci_vendor_id(struct nvml_data* data) {
-  return data->pci.pciDeviceId & 0x0000FFFF;
-}
-
-uint16_t nvml_get_pci_device_id(struct nvml_data* data) {
-  return (data->pci.pciDeviceId & 0xFFFF0000) >> 16;
-}
-
-bool nvml_shutdown(struct nvml_data* data) {
-  nvmlReturn_t result;
-
-  if(!data->nvml_started) {
-    printWarn("nvml_get_pci_info: nvml was not started");
-    return true;
-  }
-
-  if ((result = nvmlShutdown()) != NVML_SUCCESS) {
-    printErr("nvmlShutdown: %s\n", nvmlErrorString(result));
-    return false;
-  }
-
-  return true;
-}
--- a/src/cuda/nvmlb.hpp
+++ b/src/cuda/nvmlb.hpp
@@ -1,16 +0,0 @@
-// NVML Backend
-#ifndef __NVMLB__
-#define __NVMLB__
-
-#include <stdbool.h>
-#include <stdint.h>
-
-struct nvml_data;
-
-struct nvml_data* nvml_init();
-bool nvml_get_pci_info(int dev, struct nvml_data* data);
-uint16_t nvml_get_pci_vendor_id(struct nvml_data* data);
-uint16_t nvml_get_pci_device_id(struct nvml_data* data);
-bool nvml_shutdown(struct nvml_data* data);
-
-#endif
--- a/src/cuda/pci.cpp
+++ b/src/cuda/pci.cpp
@@ -1,28 +1,14 @@
 #include <stdio.h>

 #include "pci.hpp"
-#include "nvmlb.hpp"
 #include "chips.hpp"
 #include "../common/global.hpp"
+#include "../common/pci.hpp"

 #define CHECK_PCI_START if (false) {}
 #define CHECK_PCI(pci, id, chip) \
   else if (pci->device_id == id) return chip;
-#define CHECK_PCI_END else { printBug("TODOO"); return CHIP_UNKNOWN; }
-
-struct pci {
-  uint16_t vendor_id;
-  uint16_t device_id;
-};
-
-struct pci* get_pci_from_nvml(struct nvml_data* data) {
-  struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
-
-  pci->vendor_id = nvml_get_pci_vendor_id(data);
-  pci->device_id = nvml_get_pci_device_id(data);
-
-  return pci;
-}
+#define CHECK_PCI_END else { printBug("Unkown CUDA device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_CUDA; }

 /*
 * pci ids were retrieved using https://github.com/pciutils/pciids
@@ -33,7 +19,7 @@ struct pci* get_pci_from_nvml(struct nvml_data* data) {
 * or in pci.ids itself)
 */

-GPUCHIP get_chip_from_pci(struct pci* pci) {
+GPUCHIP get_chip_from_pci_cuda(struct pci* pci) {
  CHECK_PCI_START
  CHECK_PCI(pci, 0x25e5, CHIP_GA107BM)
  CHECK_PCI(pci, 0x25e2, CHIP_GA107BM)
--- a/src/cuda/pci.hpp
+++ b/src/cuda/pci.hpp
@@ -1,13 +1,19 @@
-#ifndef __PCI__
-#define __PCI__
+#ifndef __PCI_CUDA__
+#define __PCI_CUDA__

 #include <stdint.h>
-#include "nvmlb.hpp"
+
+#include "../common/pci.hpp"
 #include "chips.hpp"

+/*
+ * doc: https://wiki.osdev.org/PCI#Class_Codes
+ *      https://pci-ids.ucw.cz/read/PC
+ */
+#define PCI_VENDOR_ID_NVIDIA 0x10de
+
 struct pci;

-struct pci* get_pci_from_nvml(struct nvml_data* data);
-GPUCHIP get_chip_from_pci(struct pci* pci);
+GPUCHIP get_chip_from_pci_cuda(struct pci* pci);

 #endif
--- a/src/cuda/uarch.cpp
+++ b/src/cuda/uarch.cpp
@@ -3,21 +3,14 @@
 #include <stdint.h>
 #include <cstddef>

+#include "../common/uarch.hpp"
 #include "../common/global.hpp"
 #include "../common/gpu.hpp"
 #include "chips.hpp"

-typedef uint32_t MICROARCH;
-
 // Any clock multiplier
 #define CM_ANY               -1

-// Data not available
-#define NA                   -1
-
-// Unknown manufacturing process
-#define UNK                  -1
-
 // MICROARCH values
 enum {
  UARCH_UNKNOWN,
@@ -43,23 +36,10 @@ static const char *uarch_str[] = {
  /*[ARCH_AMPERE]     = */ "Ampere",
 };

-struct uarch {
-  int32_t cc_major;
-  int32_t cc_minor;
-  int32_t compute_capability;
-
-  MICROARCH uarch;
-  GPUCHIP chip;
-
-  int32_t process;
-  char* uarch_str;
-  char* chip_str;
-};
-
 #define CHECK_UARCH_START if (false) {}
 #define CHECK_UARCH(arch, chip_, str, uarch, process) \
   else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
-#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }
+#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }

 void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
  arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
@@ -74,7 +54,7 @@ void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t proce
 * o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip
 * o CHIP_XXXM:  indicates a mobile chip
 */
-void map_chip_to_uarch(struct uarch* arch) {
+void map_chip_to_uarch_cuda(struct uarch* arch) {
  CHECK_UARCH_START
  // TESLA (1.0, 1.1, 1.2, 1.3)                                //
  CHECK_UARCH(arch, CHIP_G80,      "G80",      UARCH_TESLA,   90)
@@ -263,9 +243,8 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
  arch->cc_major = deviceProp.major;
  arch->cc_minor = deviceProp.minor;
  arch->compute_capability = deviceProp.major * 10 + deviceProp.minor;
-  arch->chip = get_chip_from_pci(gpu->pci);
-
-  map_chip_to_uarch(arch);
+  arch->chip = get_chip_from_pci_cuda(gpu->pci);
+  map_chip_to_uarch_cuda(arch);

  return arch;
 }
@@ -335,10 +314,6 @@ MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
  CHECK_MEMTYPE_END
 }

-const char* get_str_uarch(struct uarch* arch) {
-  return uarch_str[arch->uarch];
-}
-
 char* get_str_cc(struct uarch* arch) {
  uint32_t max_size = 4;
  char* cc = (char *) ecalloc(max_size, sizeof(char));
@@ -346,31 +321,14 @@ char* get_str_cc(struct uarch* arch) {
  return cc;
 }

-char* get_str_process(struct uarch* arch) {
-  char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
-  int32_t process = arch->process;
-
-  if(process == UNK) {
-    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
-  }
-  else if(process > 100) {
-    sprintf(str, "%.2fum", (double)process/100);
-  }
-  else if(process > 0){
-    sprintf(str, "%dnm", process);
-  }
-  else {
-    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
-    printBug("Found invalid process: '%d'", process);
-  }
-
-  return str;
-}
-
 char* get_str_chip(struct uarch* arch) {
  return arch->chip_str;
 }

+const char* get_str_uarch_cuda(struct uarch* arch) {
+  return uarch_str[arch->uarch];
+}
+
 void free_uarch_struct(struct uarch* arch) {
  free(arch->uarch_str);
  free(arch->chip_str);
--- a/src/cuda/uarch.hpp
+++ b/src/cuda/uarch.hpp
@@ -1,5 +1,5 @@
-#ifndef __UARCH__
-#define __UARCH__
+#ifndef __CUDA_UARCH__
+#define __CUDA_UARCH__

 #include "../common/gpu.hpp"

@@ -8,7 +8,7 @@ struct uarch;
 struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
 bool clkm_possible_for_uarch(int clkm, struct uarch* arch);
 MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
-char* get_str_uarch(struct uarch* arch);
+char* get_str_uarch_cuda(struct uarch* arch);
 char* get_str_cc(struct uarch* arch);
 char* get_str_chip(struct uarch* arch);
 char* get_str_process(struct uarch* arch);
--- a/src/intel/check.sh
+++ b/src/intel/check.sh
@@ -0,0 +1,12 @@
+#!/bin/bash -u
+# Checks the difference between supported uarchs
+# and uarchs that have their topology available
+# in file uarch.cpp
+
+uarchs="$(grep 'CHECK_UARCH' uarch.cpp | cut -d',' -f4-5 | grep 'UARCH_GEN' | tr -d ' ' | sort | uniq)"
+topos="$(grep 'CHECK_TOPO' uarch.cpp | cut -d',' -f3,4 | grep 'UARCH_' | tr -d ' ' | sort | uniq)"
+
+echo "$uarchs" > /tmp/uarchs.txt
+echo "$topos" > /tmp/topos.txt
+meld /tmp/uarchs.txt /tmp/topos.txt
+rm -f /tmp/uarchs.txt /tmp/topos.txt
--- a/src/intel/chips.hpp
+++ b/src/intel/chips.hpp
@@ -0,0 +1,59 @@
+#ifndef __INTEL_GPUCHIPS__
+#define __INTEL_GPUCHIPS__
+
+#include <stdint.h>
+
+typedef uint32_t GPUCHIP;
+
+enum {
+  CHIP_UNKNOWN_INTEL,
+  // Gen6
+  CHIP_HD_2000,
+  CHIP_HD_3000,
+  // Gen7
+  CHIP_HD_2500,
+  CHIP_HD_4000,
+  CHIP_HD_P4000,
+  // Gen7.5
+  CHIP_HD_4200,
+  CHIP_HD_4400,
+  CHIP_HD_4600,
+  CHIP_HD_P4600,
+  CHIP_IRIS_5100,
+  CHIP_IRISP_5200,
+  CHIP_IRISP_P5200,
+  // Gen8
+  CHIP_HD_5300,
+  CHIP_HD_5500,
+  CHIP_HD_5600,
+  CHIP_HD_P5700,
+  CHIP_HD_6000,
+  CHIP_IRIS_6100,
+  CHIP_IRISP_6200,
+  CHIP_IRISP_P6300,
+  // Gen9
+  CHIP_HD_510,
+  CHIP_HD_515,
+  CHIP_HD_520,
+  CHIP_HD_530,
+  CHIP_HD_P530,
+  CHIP_HD_540,
+  CHIP_HD_550,
+  CHIP_IRIS_P555,
+  CHIP_IRIS_580,
+  CHIP_IRIS_P580,
+  // Gen9.5
+  CHIP_UHD_600,
+  CHIP_UHD_605,
+  CHIP_UHD_620,
+  CHIP_UHD_630,
+  CHIP_HD_610,
+  CHIP_HD_615,
+  CHIP_HD_620,
+  CHIP_HD_630,
+  CHIP_HD_P630,
+  CHIP_IRISP_640,
+  CHIP_IRISP_650,
+};
+
+#endif
--- a/src/intel/intel.cpp
+++ b/src/intel/intel.cpp
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "intel.hpp"
+#include "uarch.hpp"
+#include "chips.hpp"
+#include "udev.hpp"
+#include "../common/pci.hpp"
+#include "../common/global.hpp"
+
+int64_t get_peak_performance_intel(struct gpu_info* gpu) {
+  return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2;
+}
+
+struct gpu_info* get_gpu_info_intel() {
+  struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
+  gpu->vendor = GPU_VENDOR_INTEL;
+
+  struct pci_dev *devices = get_pci_devices_from_pciutils();
+  gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
+
+  if(gpu->pci == NULL) {
+    // No Intel iGPU found in PCI, which means it is not present
+    return NULL;
+  }
+
+  gpu->arch = get_uarch_from_pci(gpu->pci);
+  gpu->name = get_name_from_uarch(gpu->arch);
+  gpu->topo_i = get_topology_info(gpu->arch);
+  gpu->freq = get_max_freq_from_file(gpu->pci);
+  gpu->peak_performance = get_peak_performance_intel(gpu);
+
+  return gpu;
+}
+
+bool print_gpu_intel(struct gpu_info* gpu) {
+  if(gpu->vendor != GPU_VENDOR_INTEL) return false;
+
+  printf("Intel %s\n", gpu->name);
+
+  return true;
+}
+
+char* get_str_eu(struct gpu_info* gpu) {
+  return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice);
+}
--- a/src/intel/intel.hpp
+++ b/src/intel/intel.hpp
@@ -0,0 +1,10 @@
+#ifndef __INTEL_GPU__
+#define __INTEL_GPU__
+
+#include "../common/gpu.hpp"
+
+struct gpu_info* get_gpu_info_intel();
+bool print_gpu_intel(struct gpu_info* gpu);
+char* get_str_eu(struct gpu_info* gpu);
+
+#endif
--- a/src/intel/pci.cpp
+++ b/src/intel/pci.cpp
@@ -0,0 +1,88 @@
+#include <stdio.h>
+
+#include "pci.hpp"
+#include "chips.hpp"
+#include "../common/global.hpp"
+#include "../common/pci.hpp"
+
+#define CHECK_PCI_START if (false) {}
+#define CHECK_PCI(pci, id, chip) \
+   else if (pci->device_id == id) return chip;
+#define CHECK_PCI_END else { printBug("Unkown Intel device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_INTEL; }
+
+/*
+ * https://github.com/mesa3d/mesa/blob/main/include/pci_ids/i965_pci_ids.h
+ */
+GPUCHIP get_chip_from_pci_intel(struct pci* pci) {
+  CHECK_PCI_START
+  // Gen6
+  CHECK_PCI(pci, 0x0102, CHIP_HD_2000)
+  CHECK_PCI(pci, 0x0106, CHIP_HD_2000)
+  CHECK_PCI(pci, 0x010A, CHIP_HD_2000)
+  CHECK_PCI(pci, 0x0112, CHIP_HD_3000)
+  CHECK_PCI(pci, 0x0122, CHIP_HD_3000)
+  CHECK_PCI(pci, 0x0116, CHIP_HD_3000)
+  CHECK_PCI(pci, 0x0126, CHIP_HD_3000)
+  // Gen7
+  CHECK_PCI(pci, 0x0152, CHIP_HD_2500)
+  CHECK_PCI(pci, 0x0156, CHIP_HD_2500)
+  CHECK_PCI(pci, 0x0162, CHIP_HD_4000)
+  CHECK_PCI(pci, 0x0166, CHIP_HD_4000)
+  CHECK_PCI(pci, 0x016a, CHIP_HD_P4000)
+  // Gen7.5
+  CHECK_PCI(pci, 0x0A1E, CHIP_HD_4200)
+  CHECK_PCI(pci, 0x041E, CHIP_HD_4400)
+  CHECK_PCI(pci, 0x0A16, CHIP_HD_4400)
+  CHECK_PCI(pci, 0x0412, CHIP_HD_4600)
+  CHECK_PCI(pci, 0x0416, CHIP_HD_4600)
+  CHECK_PCI(pci, 0x0D12, CHIP_HD_4600)
+  CHECK_PCI(pci, 0x041A, CHIP_HD_P4600)
+  CHECK_PCI(pci, 0x0A2E, CHIP_IRIS_5100)
+  CHECK_PCI(pci, 0x0D22, CHIP_IRISP_5200)
+  CHECK_PCI(pci, 0x0D26, CHIP_IRISP_P5200)
+  // Gen8
+  CHECK_PCI(pci, 0x161E, CHIP_HD_5300)
+  CHECK_PCI(pci, 0x1616, CHIP_HD_5500)
+  CHECK_PCI(pci, 0x1612, CHIP_HD_5600)
+  CHECK_PCI(pci, 0x161A, CHIP_HD_P5700)
+  CHECK_PCI(pci, 0x1626, CHIP_HD_6000)
+  CHECK_PCI(pci, 0x162B, CHIP_IRIS_6100)
+  CHECK_PCI(pci, 0x1622, CHIP_IRISP_6200)
+  CHECK_PCI(pci, 0x162A, CHIP_IRISP_P6300)
+  // Gen9
+  CHECK_PCI(pci, 0x1902, CHIP_HD_510)
+  CHECK_PCI(pci, 0x1906, CHIP_HD_510)
+  CHECK_PCI(pci, 0x190B, CHIP_HD_510)
+  CHECK_PCI(pci, 0x191E, CHIP_HD_515)
+  CHECK_PCI(pci, 0x1916, CHIP_HD_520)
+  CHECK_PCI(pci, 0x1921, CHIP_HD_520)
+  CHECK_PCI(pci, 0x1912, CHIP_HD_530)
+  CHECK_PCI(pci, 0x191B, CHIP_HD_530)
+  CHECK_PCI(pci, 0x191D, CHIP_HD_P530)
+  /*CHECK_PCI(pci, 0x5917, CHIP_HD_540)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_550)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_P555)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_580)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_P580)*/
+  // Gen9.5
+  CHECK_PCI(pci, 0x3185, CHIP_UHD_600)
+  CHECK_PCI(pci, 0x3184, CHIP_UHD_605)
+  CHECK_PCI(pci, 0x5917, CHIP_UHD_620)
+  CHECK_PCI(pci, 0x3E91, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x3E92, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x3E98, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x3E9B, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x9BC5, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x9BC8, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x5902, CHIP_HD_610)
+  CHECK_PCI(pci, 0x5906, CHIP_HD_610)
+  CHECK_PCI(pci, 0x590B, CHIP_HD_610)
+  CHECK_PCI(pci, 0x591E, CHIP_HD_615)
+  CHECK_PCI(pci, 0x5912, CHIP_HD_630)
+  CHECK_PCI(pci, 0x591B, CHIP_HD_630)
+  CHECK_PCI(pci, 0x591A, CHIP_HD_P630)
+  CHECK_PCI(pci, 0x591D, CHIP_HD_P630)
+  CHECK_PCI(pci, 0x5926, CHIP_IRISP_640)
+  CHECK_PCI(pci, 0x5927, CHIP_IRISP_650)
+  CHECK_PCI_END
+}
--- a/src/intel/pci.hpp
+++ b/src/intel/pci.hpp
@@ -0,0 +1,19 @@
+#ifndef __PCI_INTEL__
+#define __PCI_INTEL__
+
+#include <stdint.h>
+
+#include "../common/pci.hpp"
+#include "chips.hpp"
+
+/*
+ * doc: https://wiki.osdev.org/PCI#Class_Codes
+ *      https://pci-ids.ucw.cz/read/PC
+ */
+#define PCI_VENDOR_ID_INTEL 0x8086
+
+struct pci;
+
+GPUCHIP get_chip_from_pci_intel(struct pci* pci);
+
+#endif
--- a/src/intel/uarch.cpp
+++ b/src/intel/uarch.cpp
@@ -0,0 +1,212 @@
+#include <stdint.h>
+#include <cstddef>
+#include <string.h>
+#include <stdio.h>
+
+#include "../common/uarch.hpp"
+#include "../common/global.hpp"
+#include "../common/gpu.hpp"
+#include "chips.hpp"
+#include "pci.hpp"
+
+// Data not available
+#define NA                   -1
+
+// Unknown manufacturing process
+#define UNK                  -1
+
+/*
+ * Mapping between iGPU and CPU uarchs
+ * -----------------------------------
+ * Gen6:   Sandy Bridge (2th Gen)
+ * Gen7:   Ivy Brdige   (3th Gen)
+ * Gen7.5: Haswell      (4th Gen)
+ * Gen8:   Broadwell    (5th Gen)
+ * Gen9:   Skylake      (6th Gen)
+ * Gen9.5: Kaby Lake
+ */
+enum {
+  UARCH_UNKNOWN,
+  UARCH_GEN6,
+  UARCH_GEN7,
+  UARCH_GEN7_5,
+  UARCH_GEN8,
+  UARCH_GEN9,
+  UARCH_GEN9_5,
+};
+
+static const char *uarch_str[] = {
+  /*[ARCH_UNKNOWN    = */ STRING_UNKNOWN,
+  /*[ARCH_GEN6]      = */ "Gen6",
+  /*[ARCH_GEN7]      = */ "Gen7",
+  /*[ARCH_GEN7_5]    = */ "Gen7.5",
+  /*[ARCH_GEN8]      = */ "Gen8",
+  /*[ARCH_GEN9]      = */ "Gen9",
+  /*[ARCH_GEN9_5]    = */ "Gen9.5",
+};
+
+// Graphic Tiers (GT)
+enum {
+  GT_UNKNOWN,
+  GT1,
+  GT1_5,
+  GT2,
+  GT3,
+  GT3e,
+  GT4e
+};
+
+static const char *gt_str[] = {
+  /*[GT_UNKNOWN] = */ STRING_UNKNOWN,
+  /*[GT1]        = */ "GT1",
+  /*[GT1_5]      = */ "GT1.5",
+  /*[GT2]        = */ "GT2",
+  /*[GT3]        = */ "GT3",
+  /*[GT3e]       = */ "GT3e",
+  /*[GT4e]       = */ "GT4e",
+};
+
+#define CHECK_UARCH_START if (false) {}
+#define CHECK_UARCH(arch, chip_, str, uarch, gt, process) \
+   else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
+#define CHECK_UARCH_END else { printBug("map_chip_to_uarch_intel: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
+
+#define CHECK_TOPO_START if (false) {}
+#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \
+  else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli);
+#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); }
+
+void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) {
+  topo_i->slices = sli;
+  topo_i->subslices = sub;
+  topo_i->eu_subslice = eu_sub;
+}
+
+void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
+  arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
+  strcpy(arch->chip_str, str);
+  arch->uarch = u;
+  arch->process = process;
+  arch->gt = gt;
+}
+
+void map_chip_to_uarch_intel(struct uarch* arch) {
+  CHECK_UARCH_START
+  // Gen6
+  CHECK_UARCH(arch, CHIP_HD_2000,     "HD Graphics 2000",        UARCH_GEN6,   GT1,  32)
+  CHECK_UARCH(arch, CHIP_HD_3000,     "HD Graphics 3000",        UARCH_GEN6,   GT2,  32)
+  // Gen7
+  CHECK_UARCH(arch, CHIP_HD_2500,     "HD Graphics 2500",        UARCH_GEN7,   GT1,  22)
+  CHECK_UARCH(arch, CHIP_HD_4000,     "HD Graphics 4000",        UARCH_GEN7,   GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_P4000,    "HD Graphics P4000",       UARCH_GEN7,   GT2,  22)
+  // Gen7.5
+  CHECK_UARCH(arch, CHIP_HD_4200,     "HD Graphics 4200",        UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_4400,     "HD Graphics 4400",        UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_4600,     "HD Graphics 4600",        UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_P4600,    "HD Graphics P4600",       UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_IRIS_5100,   "HD Iris 5100",            UARCH_GEN7_5, GT3,  22)
+  CHECK_UARCH(arch, CHIP_IRISP_5200,  "HD Iris Pro 5200",        UARCH_GEN7_5, GT3,  22)
+  CHECK_UARCH(arch, CHIP_IRISP_P5200, "HD Iris Pro P5200",       UARCH_GEN7_5, GT3,  22)
+  // Gen8
+  CHECK_UARCH(arch, CHIP_HD_5300,     "HD Graphics 5300",        UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_5500,     "HD Graphics 5500",        UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_5600,     "HD Graphics 5600",        UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_P5700,    "HD Graphics P5700",       UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_6000,     "HD Graphics 6000",        UARCH_GEN8,   GT3,  14)
+  CHECK_UARCH(arch, CHIP_IRIS_6100,   "Iris Graphics 6100",      UARCH_GEN8,   GT3,  14)
+  CHECK_UARCH(arch, CHIP_IRISP_6200,  "Iris Pro Graphics 6200",  UARCH_GEN8,   GT3,  14)
+  CHECK_UARCH(arch, CHIP_IRISP_P6300, "Iris Pro Graphics P6300", UARCH_GEN8,   GT3,  14)
+  // Gen9
+  CHECK_UARCH(arch, CHIP_HD_510,      "HD Graphics 510",         UARCH_GEN9,   GT1,   14)
+  CHECK_UARCH(arch, CHIP_HD_515,      "HD Graphics 515",         UARCH_GEN9,   GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_520,      "HD Graphics 520",         UARCH_GEN9,   GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_530,      "HD Graphics 530",         UARCH_GEN9,   GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_P530,     "HD Graphics P530",        UARCH_GEN9,   GT2,   14)
+  // Gen9.5
+  CHECK_UARCH(arch, CHIP_UHD_600,     "UHD Graphics 600",        UARCH_GEN9_5, GT1,   14)
+  CHECK_UARCH(arch, CHIP_UHD_605,     "UHD Graphics 605",        UARCH_GEN9_5, GT1_5, 14)
+  CHECK_UARCH(arch, CHIP_UHD_620,     "UHD Graphics 620",        UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_UHD_630,     "UHD Graphics 630",        UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_610,      "HD Graphics 610",         UARCH_GEN9_5, GT1,   14)
+  CHECK_UARCH(arch, CHIP_HD_615,      "HD Graphics 615",         UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_630,      "HD Graphics 630",         UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_P630,     "HD Graphics P630",        UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_IRISP_640,   "Iris Plus Graphics 640",  UARCH_GEN9_5, GT3e,  14)
+  CHECK_UARCH(arch, CHIP_IRISP_640,   "Iris Plus Graphics 650",  UARCH_GEN9_5, GT3e,  14)
+  CHECK_UARCH_END
+}
+
+const char* get_str_uarch_intel(struct uarch* arch) {
+  return uarch_str[arch->uarch];
+}
+
+const char* get_str_gt(struct uarch* arch) {
+  return gt_str[arch->gt];
+}
+
+struct uarch* get_uarch_from_pci(struct pci* pci) {
+  struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
+
+  arch->chip_str = NULL;
+  arch->chip = get_chip_from_pci_intel(pci);
+  if(arch->chip == CHIP_UNKNOWN_INTEL) {
+    return NULL;
+  }
+  else {
+    map_chip_to_uarch_intel(arch);
+    return arch;
+  }
+}
+
+char* get_name_from_uarch(struct uarch* arch) {
+  char* name = (char *) emalloc(sizeof(char) * (strlen(arch->chip_str) + 6 + 1));
+  sprintf(name, "Intel %s", arch->chip_str);
+  return name;
+}
+
+/*
+ * Refs:
+ * Gen6:     https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen6
+ * Gen7/7.5: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen7
+             "The Compute Architecture of Intel Processor Graphics Gen7.5, v1.0"
+ * Gen8:     https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen8
+             "The Compute Architecture of Intel Processor Graphics Gen8, v1.1"
+ * Gen9:     https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration
+             "The Compute Architecture of Intel Processor Graphics Gen9, v1.0"
+ * Gen9.5:   https://en.wikichip.org/wiki/intel/microarchitectures/gen9.5#Configuration
+ */
+struct topology_i* get_topology_info(struct uarch* arch) {
+  struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i));
+
+  // Syntax: (EU per subslice, Subslices, Slices)
+  CHECK_TOPO_START
+  // Gen6
+  CHECK_TOPO(topo, arch, UARCH_GEN6,   GT1,   6, 1, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN6,   GT2,   6, 2, 1)
+  // Gen7
+  CHECK_TOPO(topo, arch, UARCH_GEN7,   GT1,   6, 1, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7,   GT2,   8, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7,   GT3,   6, 1, 1)
+  // Gen7.5
+  CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT1,  10, 1, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT2,  10, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT3,  10, 4, 1)
+  // Gen8
+  CHECK_TOPO(topo, arch, UARCH_GEN8,   GT1,   6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN8,   GT2,   8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN8,   GT3,   8, 6, 2)
+  // Gen9
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT1,   6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT2,   8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT3,   8, 6, 2)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT4e,  8, 9, 3)
+  // Gen9.5
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1,   6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1_5, 6, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2,   8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3,   8, 6, 2)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3e,  8, 6, 2) // Same as GT3, but has eDRAM cache
+  CHECK_TOPO_END
+
+  return topo;
+}
--- a/src/intel/uarch.hpp
+++ b/src/intel/uarch.hpp
@@ -0,0 +1,14 @@
+#ifndef __INTEL_UARCH__
+#define __INTEL_UARCH__
+
+#include "../common/gpu.hpp"
+
+struct uarch;
+
+struct uarch* get_uarch_from_pci(struct pci* pci);
+char* get_name_from_uarch(struct uarch* arch);
+char* get_str_gt(struct uarch* arch);
+char* get_str_uarch_intel(struct uarch* arch);
+struct topology_i* get_topology_info(struct uarch* arch);
+
+#endif
--- a/src/intel/udev.cpp
+++ b/src/intel/udev.cpp
@@ -0,0 +1,89 @@
+#include <cstddef>
+#include <cstring>
+#include <cstdlib>
+#include <cstdint>
+#include <cerrno>
+#include <cstdio>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "../common/global.hpp"
+#include "../common/pci.hpp"
+
+#define _PATH_SYS_SYSTEM        "/sys/devices/pci0000:00"
+#define _PATH_SYS_DRM           "/drm"
+#define _PATH_CARD              "/card0"
+#define _PATH_FREQUENCY_MAX     "/gt_max_freq_mhz"
+#define _PATH_FREQUENCY_MIN     "/gt_min_freq_mhz"
+
+#define _PATH_FREQUENCY_MAX_LEN 100
+#define DEFAULT_FILE_SIZE       4096
+#define UNKNOWN_DATA            -1
+
+char* read_file(char* path, int* len) {
+  int fd = open(path, O_RDONLY);
+
+  if(fd == -1) {
+    return NULL;
+  }
+
+  //File exists, read it
+  int bytes_read = 0;
+  int offset = 0;
+  int block = 128;
+  char* buf = (char *) emalloc(sizeof(char)*DEFAULT_FILE_SIZE);
+  memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE);
+
+  while (  (bytes_read = read(fd, buf+offset, block)) > 0 ) {
+    offset += bytes_read;
+  }
+
+  if (close(fd) == -1) {
+    return NULL;
+  }
+
+  *len = offset;
+  return buf;
+}
+
+long get_freq_from_file(char* path) {
+  int filelen;
+  char* buf;
+  if((buf = read_file(path, &filelen)) == NULL) {
+    printWarn("Could not open '%s'", path);
+    return UNKNOWN_DATA;
+  }
+
+  char* end;
+  errno = 0;
+  long ret = strtol(buf, &end, 10);
+  if(errno != 0) {
+    printBug("strtol: %s", strerror(errno));
+    free(buf);
+    return UNKNOWN_DATA;
+  }
+
+  // We will be getting the frequency in MHz
+  // We consider it is an error if frequency is
+  // greater than 10 GHz or less than 100 MHz
+  if(ret > 10000 || ret <  100) {
+    printBug("Invalid data was read from file '%s': %ld\n", path, ret);
+    return UNKNOWN_DATA;
+  }
+
+  free(buf);
+
+  return ret;
+}
+
+long get_max_freq_from_file(struct pci* pci) {
+  char path[_PATH_FREQUENCY_MAX_LEN];
+  sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MAX);
+  return get_freq_from_file(path);
+}
+
+long get_min_freq_from_file(struct pci* pci) {
+  char path[_PATH_FREQUENCY_MAX_LEN];
+  sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MIN);
+  return get_freq_from_file(path);
+}
--- a/src/intel/udev.hpp
+++ b/src/intel/udev.hpp
@@ -0,0 +1,7 @@
+#ifndef __UDEV__
+#define __UDEV__
+
+long get_max_freq_from_file(struct pci* pci);
+long get_min_freq_from_file(struct pci* pci);
+
+#endif
Author	SHA1	Message	Date
Dr-Noob	a397eb398e	[v0.11] Handle the case where the GPU is not found in the pci LUT	2021-12-18 20:12:41 +01:00
Dr-Noob	bfb9738132	[v0.11] Do not show error message when there is no Intel iGPU	2021-12-18 10:35:51 +01:00
Dr-Noob	6d4d8b621b	[v0.11] Fix compilation error and ambiguity with CUDA and Intel backend when enabled at the same time due to functions with the same name	2021-12-18 10:14:14 +01:00
Dr-Noob	93889b2b18	[v0.11] Small adjustments to fix compilation on older compilers	2021-12-10 16:18:39 +01:00
Dr-Noob	b6ce96e746	[v0.11] Add missing Intel iGPU topologies. Add script to check for missing topo/uarchs	2021-12-10 15:55:59 +01:00
Dr-Noob	5f52f73fe0	[v0.11] Completed most of Intel iGPU topologies	2021-12-10 15:32:29 +01:00
Dr-Noob	e5deeb1309	[v0.11] Adding more Intel iGPU topologies	2021-12-10 15:16:29 +01:00
Dr-Noob	44a884fd07	[v0.11] Print peak performance in Intel iGPU	2021-12-09 20:28:07 +01:00
Dr-Noob	1663a36135	[v0.11] Fetch and print max Intel iGPU frequency using sysfs	2021-12-09 20:18:39 +01:00
Dr-Noob	844377f17a	[v0.11] Add support for printing EUs (currently only in Gen9/Gen9.5)	2021-12-08 11:15:59 +01:00
Dr-Noob	2034bac006	[v0.11] Displaying Graphics Tier in Intel iGPUs	2021-11-27 14:02:02 +01:00
Dr-Noob	e7c4d5bf91	[v0.11] Adding Gen6, 7, 7.5 and 8 to database	2021-11-27 12:23:41 +01:00
Dr-Noob	b00050e739	[v0.11] Print available more information for iGPU	2021-11-27 11:22:16 +01:00
Dr-Noob	8db60b614d	[v0.11] Adding most of Gen9/9.5 iGPUs to database	2021-11-27 11:10:01 +01:00
Dr-Noob	8740337145	[v0.11] Adding uarch backend for intel iGPUs	2021-11-26 12:52:45 +01:00
Dr-Noob	ce004725ad	[v0.11] Working in printer backend to show logo and text for intel iGPU	2021-11-26 09:58:45 +01:00
Dr-Noob	310486a6a2	[v0.11] Fixes to recover CUDA functionality, ready for implementing Intel iGPU code	2021-11-26 09:33:57 +01:00
Dr-Noob	e5a4f91b20	[v0.11] Hacky way to solve CMake issues without requiring newer CMake versions	2021-11-26 09:19:24 +01:00
Dr-Noob	461e0d2ede	[v0.11] Working in master GPU handler for supporting diverse GPU vendors	2021-11-26 08:22:30 +01:00
Dr-Noob	149e5ad62c	[v0.11] Working for future support of Intel iGPUs	2021-11-25 19:03:52 +01:00
Dr-Noob	3502f48f71	[v0.11] Style adjustments in README	2021-11-25 18:06:00 +01:00
Dr-Noob	5acb4ff7dc	[v0.11] Small style adjustments in README	2021-11-25 18:01:57 +01:00
Dr-Noob	074c159e5f	[v0.11] Update README image	2021-11-25 17:58:57 +01:00
Dr-Noob	cedcfecb80	[v0.11] Dont show tensor cores when there is 0. Use MMA (matrix multiply accumulate) instead of TC (tensor cores)	2021-11-25 17:52:58 +01:00
Dr-Noob	32b2c59b50	[v0.11] Add peak performance with tensor cores to the output	2021-11-23 18:49:34 +01:00
Dr-Noob	8bf0276aae	[v0.10] Simple refactoring	2021-11-23 18:17:12 +01:00
Dr-Noob	821b6e760e	[v0.10] Add support for displaying the number of tensor cores	2021-11-23 18:09:13 +01:00
Dr-Noob	f212fb88d4	[v0.10] Fix pci initialization	2021-09-08 08:17:06 +02:00
Dr-Noob	81607151dc	[v0.10] Update build script and README	2021-09-04 16:02:50 +02:00
Dr-Noob	bdf9eb0079	[v0.10] Use CMake instead of Make, which will take care of pciutils automatically if it is not installed	2021-09-04 14:05:16 +02:00
Dr-Noob	039e7c350d	[v0.10] Replace nvml by pciutils to get pci ids. Needs work to integrate it properly. NVML is enough in the case of NVIDIA GPUs, but because more GPUs will be added in the future, a solution like pciutils is needed	2021-09-04 12:19:42 +02:00
Dr-Noob	4b4d1bc030	[v0.10] Add --list-gpus option	2021-08-23 22:39:31 +02:00
Dr-Noob	d00e3f183d	[v0.10] Add simple man page	2021-08-23 22:02:45 +02:00