[v0.20] Merge Intel iGPU branch for preeliminary Intel GPU support

2021-12-19 10:11:23 +01:00
parent 38b8949e1c a397eb398e
commit 981bfabdc8
30 changed files with 1061 additions and 241 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,23 +7,22 @@ project(gpufetch CXX)
 set(SRC_DIR "src")
 set(COMMON_DIR "${SRC_DIR}/common")
 set(CUDA_DIR "${SRC_DIR}/cuda")
+set(INTEL_DIR "${SRC_DIR}/intel")

-if(NOT WIN32)
-  string(ASCII 27 Esc)
-  set(ColorReset "${Esc}[m")
-  set(ColorBold  "${Esc}[1m")
-  set(Red         "${Esc}[31m")
-  set(Green	  "${Esc}[32m")
-  set(BoldRed     "${Esc}[1;31m")
-  set(BoldGreen   "${Esc}[1;32m")
-  set(BoldYellow  "${Esc}[1;33m")
+if(NOT DEFINED ENABLE_INTEL_BACKEND)
+    set(ENABLE_INTEL_BACKEND true)
 endif()

-check_language(CUDA)
-if(CMAKE_CUDA_COMPILER)
+if(NOT DEFINED ENABLE_CUDA_BACKEND OR ENABLE_CUDA_BACKEND)
+  check_language(CUDA)
+  if(CMAKE_CUDA_COMPILER)
    enable_language(CUDA)
-else()
-  message(FATAL_ERROR "${BoldRed}[ERROR]${ColorReset} Unable to find CUDA compiler. You may use -DCMAKE_CUDA_COMPILER and -DCMAKE_CUDA_COMPILER_TOOLKIT_ROOT if CUDA is installed but not detected by CMake")
+    set(ENABLE_CUDA_BACKEND true)
+    # Must link_directories early so add_executable(gpufetch ...) gets the right directories
+    link_directories(cuda_backend ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
+  else()
+    set(ENABLE_CUDA_BACKEND false)
+  endif()
 endif()

 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")
@@ -48,34 +47,73 @@ else()
  link_libraries(${PCIUTILS_LIBRARIES})
 endif()

+add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
 set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
-set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic")
+set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11")

-# https://en.wikipedia.org/w/index.php?title=CUDA&section=5#GPUs_supported
-# https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
-if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
+if(ENABLE_INTEL_BACKEND)
+  target_compile_definitions(gpufetch PUBLIC BACKEND_INTEL)
+
+  add_library(intel_backend STATIC ${INTEL_DIR}/intel.cpp ${INTEL_DIR}/pci.cpp ${INTEL_DIR}/uarch.cpp ${INTEL_DIR}/udev.cpp)
+
+  if(NOT ${PCIUTILS_FOUND})
+    add_dependencies(intel_backend pciutils)
+  endif()
+
+  target_link_libraries(gpufetch intel_backend)
+endif()
+
+if(ENABLE_CUDA_BACKEND)
+  target_compile_definitions(gpufetch PUBLIC BACKEND_CUDA)
+
+  # https://en.wikipedia.org/w/index.php?title=CUDA&section=5#GPUs_supported
+  # https://raw.githubusercontent.com/PointCloudLibrary/pcl/master/cmake/pcl_find_cuda.cmake
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0")
    set(CMAKE_CUDA_ARCHITECTURES 35 37 50 52 53 60 61 62 70 72 75 80 86)
-elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
+  elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "10.0")
    set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72 75)
-elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
+  elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "9.0")
    set(CMAKE_CUDA_ARCHITECTURES 30 32 35 37 50 52 53 60 61 62 70 72)
-elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
+  elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "8.0")
    set(CMAKE_CUDA_ARCHITECTURES 20 21 30 32 35 37 50 52 53 60 61 62)
-endif()
+  endif()

-link_directories(${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/lib)
+  add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)

-add_library(cuda_backend STATIC ${CUDA_DIR}/cuda.cpp ${CUDA_DIR}/uarch.cpp ${CUDA_DIR}/pci.cpp)
-add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp)
-
-if(NOT ${PCIUTILS_FOUND})
+  if(NOT ${PCIUTILS_FOUND})
    add_dependencies(cuda_backend pciutils)
-  add_dependencies(gpufetch pciutils)
+  endif()
+
+  target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
+
+  target_link_libraries(cuda_backend PRIVATE cudart)
+  target_link_libraries(gpufetch cuda_backend)
 endif()

-target_include_directories(cuda_backend PUBLIC ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/samples/common/inc ${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/targets/x86_64-linux/include)
-
-target_link_libraries(cuda_backend cudart)
-target_link_libraries(gpufetch cuda_backend pci z)
-
+target_link_libraries(gpufetch pci z)
 install(TARGETS gpufetch DESTINATION bin)
+
+if(NOT WIN32)
+  string(ASCII 27 Esc)
+  set(ColorReset "${Esc}[m")
+  set(ColorBold  "${Esc}[1m")
+  set(Red         "${Esc}[31m")
+  set(Green       "${Esc}[32m")
+  set(BoldRed     "${Esc}[1;31m")
+  set(BoldGreen   "${Esc}[1;32m")
+  set(BoldYellow  "${Esc}[1;33m")
+endif()
+
+message(STATUS "----------------------")
+message(STATUS "gpufetch build report:")
+if(ENABLE_INTEL_BACKEND)
+  message(STATUS "Intel backend: ${BoldGreen}ON${ColorReset}")
+else()
+  message(STATUS "Intel backend: ${BoldRed}OFF${ColorReset}")
+endif()
+if(ENABLE_CUDA_BACKEND)
+  message(STATUS "CUDA backend: ${BoldGreen}ON${ColorReset}")
+else()
+  message(STATUS "CUDA backend: ${BoldRed}OFF${ColorReset}")
+endif()
+message(STATUS "----------------------")
--- a/src/common/args.cpp
+++ b/src/common/args.cpp
@@ -13,8 +13,13 @@
 #define NUM_COLORS         4

 #define COLOR_STR_NVIDIA "nvidia"
+#define COLOR_STR_INTEL  "intel"

-#define COLOR_DEFAULT_NVIDIA "118,185,0:255,255,255:255,255,255:118,185,0"
+//                              +-----------------------+-----------------------+
+//                              | Color logo            | Color text            |
+//                              | Color 1   | Color 2   | Color 1   | Color 2   |
+#define COLOR_DEFAULT_NVIDIA    "118,185,000:255,255,255:255,255,255:118,185,000"
+#define COLOR_DEFAULT_INTEL     "015,125,194:230,230,230:040,150,220:230,230,230"

 struct args_struct {
  bool help_flag;
@@ -145,6 +150,7 @@ bool parse_color(char* optarg_str, struct color*** cs) {
  bool free_ptr = true;

  if(strcmp(optarg_str, COLOR_STR_NVIDIA) == 0) color_to_copy = COLOR_DEFAULT_NVIDIA;
+  else if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) color_to_copy = COLOR_DEFAULT_INTEL;
  else {
    str_to_parse = optarg_str;
    free_ptr = false;
--- a/src/common/ascii.hpp
+++ b/src/common/ascii.hpp
@@ -1,32 +1,32 @@
 #ifndef __ASCII__
 #define __ASCII__

-#define COLOR_NONE         ""
-#define COLOR_FG_BLACK     "\x1b[30;1m"
-#define COLOR_FG_RED       "\x1b[31;1m"
-#define COLOR_FG_GREEN     "\x1b[32;1m"
-#define COLOR_FG_YELLOW    "\x1b[33;1m"
-#define COLOR_FG_BLUE      "\x1b[34;1m"
-#define COLOR_FG_MAGENTA   "\x1b[35;1m"
-#define COLOR_FG_CYAN      "\x1b[36;1m"
-#define COLOR_FG_WHITE     "\x1b[37;1m"
-#define COLOR_BG_BLACK     "\x1b[40;1m"
-#define COLOR_BG_RED       "\x1b[41;1m"
-#define COLOR_BG_GREEN     "\x1b[42;1m"
-#define COLOR_BG_YELLOW    "\x1b[43;1m"
-#define COLOR_BG_BLUE      "\x1b[44;1m"
-#define COLOR_BG_MAGENTA   "\x1b[45;1m"
-#define COLOR_BG_CYAN      "\x1b[46;1m"
-#define COLOR_BG_WHITE     "\x1b[47;1m"
-#define COLOR_FG_B_BLACK   "\x1b[90;1m"
-#define COLOR_FG_B_RED     "\x1b[91;1m"
-#define COLOR_FG_B_GREEN   "\x1b[92;1m"
-#define COLOR_FG_B_YELLOW  "\x1b[93;1m"
-#define COLOR_FG_B_BLUE    "\x1b[94;1m"
-#define COLOR_FG_B_MAGENTA "\x1b[95;1m"
-#define COLOR_FG_B_CYAN    "\x1b[96;1m"
-#define COLOR_FG_B_WHITE   "\x1b[97;1m"
-#define COLOR_RESET        "\x1b[m"
+#define C_NONE         ""
+#define C_FG_BLACK     "\x1b[30;1m"
+#define C_FG_RED       "\x1b[31;1m"
+#define C_FG_GREEN     "\x1b[32;1m"
+#define C_FG_YELLOW    "\x1b[33;1m"
+#define C_FG_BLUE      "\x1b[34;1m"
+#define C_FG_MAGENTA   "\x1b[35;1m"
+#define C_FG_CYAN      "\x1b[36;1m"
+#define C_FG_WHITE     "\x1b[37;1m"
+#define C_BG_BLACK     "\x1b[40;1m"
+#define C_BG_RED       "\x1b[41;1m"
+#define C_BG_GREEN     "\x1b[42;1m"
+#define C_BG_YELLOW    "\x1b[43;1m"
+#define C_BG_BLUE      "\x1b[44;1m"
+#define C_BG_MAGENTA   "\x1b[45;1m"
+#define C_BG_CYAN      "\x1b[46;1m"
+#define C_BG_WHITE     "\x1b[47;1m"
+#define C_FG_B_BLACK   "\x1b[90;1m"
+#define C_FG_B_RED     "\x1b[91;1m"
+#define C_FG_B_GREEN   "\x1b[92;1m"
+#define C_FG_B_YELLOW  "\x1b[93;1m"
+#define C_FG_B_BLUE    "\x1b[94;1m"
+#define C_FG_B_MAGENTA "\x1b[95;1m"
+#define C_FG_B_CYAN    "\x1b[96;1m"
+#define C_FG_B_WHITE   "\x1b[97;1m"
+#define C_RESET        "\x1b[m"

 struct ascii_logo {
  const char* art;
@@ -59,6 +59,23 @@ $C2##   ##  ##   ##  ##  ##   ##  ##   #: :#    \
 $C2##   ##   ## ##   ##  ##   ##  ##  #######   \
 $C2##   ##    ###    ##  ######   ## ##     ##  "

+#define ASCII_INTEL \
+"$C1                   .#################.          \
+$C1              .####                   ####.     \
+$C1          .##                             ###   \
+$C1       ##                          :##     ###  \
+$C1    #                ##            :##      ##  \
+$C1  ##   ##  ######.   ####  ######  :##      ##  \
+$C1 ##    ##  ##:  ##:  ##   ##   ### :##     ###  \
+$C1##     ##  ##:  ##:  ##  :######## :##    ##    \
+$C1##     ##  ##:  ##:  ##   ##.   .  :## ####     \
+$C1##      #  ##:  ##:  ####  #####:   ##          \
+$C1 ##                                             \
+$C1  ###.                         ..o####.         \
+$C1   ######oo...         ..oo#######              \
+$C1          o###############o                     "
+
+// LONG LOGOS
 #define ASCII_NVIDIA_L \
 "$C1                  MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM  \
 $C1                  MMMMMMMMMMMMMMMMMMMMMMMMMMMMMM  \
@@ -76,14 +93,37 @@ $C1            olcc::;              ,:ccloMMMMMMMMM  \
 $C1                  :......oMMMMMMMMMMMMMMMMMMMMMM  \
 $C1                  :lllMMMMMMMMMMMMMMMMMMMMMMMMMM  "

+#define ASCII_INTEL_L \
+"$C1                               ###############@               \
+$C1                       ######@                ######@         \
+$C1                  ###@                              ###@      \
+$C1              ##@                                     ###@    \
+$C1         ##@                                             ##@  \
+$C1         ##@                                             ##@  \
+$C1      @                    ##@                ##@        ##@  \
+$C1    #@   ##@   ########@   #####@   #####@    ##@        ##@  \
+$C1   #@    ##@   ##@    ##@  ##@    ###@  ###@  ##@        ##@  \
+$C1  #@     ##@   ##@    ##@  ##@    ##@    ##@  ##@       ##@   \
+$C1 #@      ##@   ##@    ##@  ##@    #########@  ##@     ###@    \
+$C1 #@      ##@   ##@    ##@  ##@    ##@         ##@   ####@     \
+$C1 #@       #@   ##@    ##@   ####@  ########@   #@  ##@        \
+$C1 ##@                                                          \
+$C1  ##@                                                         \
+$C1  ###@                                        ###@            \
+$C1    ####@                               #########@            \
+$C1      #########@               ###############@               \
+$C1          ##############################@                     "
+
 typedef struct ascii_logo asciiL;

-//                      ------------------------------------------------------------------------------------------------------
-//                      | LOGO          | W | H | REPLACE | COLORS LOGO (>0 && <10)        | COLORS TEXT (=2)                |
-//                      ------------------------------------------------------------------------------------------------------
-asciiL logo_nvidia    = { ASCII_NVIDIA,    45, 19, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} };
-// Long variants        | ---------------------------------------------------------------------------------------------------|
-asciiL logo_nvidia_l  = { ASCII_NVIDIA_L,  50, 15, false, {COLOR_FG_GREEN, COLOR_FG_WHITE}, {COLOR_FG_WHITE, COLOR_FG_GREEN} };
-asciiL logo_unknown   = { NULL,            0,  0,  false, {COLOR_NONE},                     {COLOR_NONE,    COLOR_NONE}      };
+//                      ------------------------------------------------------------------------------------------
+//                      | LOGO            | W | H | REPLACE | COLORS LOGO           | COLORS TEXT                |
+//                      ------------------------------------------------------------------------------------------
+asciiL logo_nvidia    = { ASCII_NVIDIA,    45, 19, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
+asciiL logo_intel     = { ASCII_INTEL,     48, 14, false, {C_FG_CYAN},              {C_FG_CYAN,  C_FG_WHITE} };
+// Long variants        | ---------------------------------------------------------------------------------------|
+asciiL logo_nvidia_l  = { ASCII_NVIDIA_L,  50, 15, false, {C_FG_GREEN, C_FG_WHITE}, {C_FG_WHITE, C_FG_GREEN} };
+asciiL logo_intel_l   = { ASCII_INTEL_L,   62, 19, true,  {C_BG_CYAN, C_BG_WHITE},  {C_FG_CYAN,  C_FG_WHITE} };
+asciiL logo_unknown   = { NULL,            0,  0,  false, {C_NONE},                 {C_NONE,     C_NONE}     };

 #endif
--- a/src/common/gpu.cpp
+++ b/src/common/gpu.cpp
@@ -32,8 +32,6 @@ VENDOR get_gpu_vendor(struct gpu_info* gpu) {
  return gpu->vendor;
 }

-double trunc(double val) { return ((int)(100 * val)) / 100.0; }
-
 int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
  int32_t ret;
  int max_len = 10; // Max is 8 for digits, 2 for units
@@ -145,6 +143,13 @@ char* get_str_peak_performance(struct gpu_info* gpu) {
 }

 char* get_str_peak_performance_tensor(struct gpu_info* gpu) {
-  return get_str_peak_performance_generic(gpu->peak_performance_t);
+  return get_str_peak_performance_generic(gpu->peak_performance_tcu);
 }

+char* get_str_generic(int32_t data) {
+  // Largest int is 10, +1 for possible negative, +1 for EOL
+  uint32_t max_size = 12;
+  char* dummy = (char *) ecalloc(max_size, sizeof(char));
+  snprintf(dummy, max_size, "%d", data);
+  return dummy;
+}
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -9,7 +9,8 @@
 #define UNKNOWN_FREQ -1

 enum {
-  GPU_VENDOR_NVIDIA
+  GPU_VENDOR_NVIDIA,
+  GPU_VENDOR_INTEL
 };

 enum {
@@ -43,6 +44,12 @@ struct topology {
  int32_t tensor_cores;
 };

+struct topology_i {
+  int32_t slices;
+  int32_t subslices;
+  int32_t eu_subslice;
+};
+
 struct memory {
  int64_t size_bytes;
  MEMTYPE type;
@@ -58,10 +65,11 @@ struct gpu_info {
  int64_t freq;
  struct pci* pci;
  struct topology* topo;
+  struct topology_i* topo_i;
  struct memory* mem;
  struct cache* cach;
  int64_t peak_performance;
-  int64_t peak_performance_t;
+  int64_t peak_performance_tcu;
  int32_t idx;
 };

@@ -75,5 +83,6 @@ char* get_str_memory_clock(struct gpu_info* gpu);
 char* get_str_l2(struct gpu_info* gpu);
 char* get_str_peak_performance(struct gpu_info* gpu);
 char* get_str_peak_performance_tensor(struct gpu_info* gpu);
+char* get_str_generic(int32_t data);

 #endif
--- a/src/common/main.cpp
+++ b/src/common/main.cpp
@@ -4,6 +4,7 @@

 #include "args.hpp"
 #include "global.hpp"
+#include "master.hpp"
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"

@@ -65,18 +66,19 @@ int main(int argc, char* argv[]) {
    return EXIT_SUCCESS;
  }

+  struct gpu_list* list = get_gpu_list();
  if(list_gpus()) {
-    return print_gpus_list();
+    return print_gpus_list(list);
  }

  set_log_level(true);

-  printWarn("gpufetch is in beta. The provided information may be incomplete or wrong.\n\
+  printf("[WARNING]: gpufetch is in beta. The provided information may be incomplete or wrong.\n\
 If you want to help to improve gpufetch, please compare the output of the program\n\
 with a reliable source which you know is right (e.g, techpowerup.com) and report\n\
-any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues");
+any inconsistencies to https://github.com/Dr-Noob/gpufetch/issues\n");

-  struct gpu_info* gpu = get_gpu_info(get_gpu_idx());
+  struct gpu_info* gpu = get_gpu_info(list, get_gpu_idx());
  if(gpu == NULL)
    return EXIT_FAILURE;

--- a/src/common/master.cpp
+++ b/src/common/master.cpp
@@ -0,0 +1,62 @@
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "master.hpp"
+#include "../cuda/cuda.hpp"
+#include "../intel/intel.hpp"
+
+#define MAX_GPUS 1000
+
+struct gpu_list {
+  struct gpu_info ** gpus;
+  int num_gpus;
+};
+
+struct gpu_list* get_gpu_list() {
+  int idx = 0;
+  struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
+  list->num_gpus = 0;
+  list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
+
+#ifdef BACKEND_CUDA
+  bool valid = true;
+
+  while(valid) {
+    list->gpus[idx] = get_gpu_info_cuda(idx);
+    if(list->gpus[idx] != NULL) idx++;
+    else valid = false;
+  }
+
+  list->num_gpus += idx;
+#endif
+
+#ifdef BACKEND_INTEL
+  list->gpus[idx] = get_gpu_info_intel();
+  if(list->gpus[idx] != NULL) list->num_gpus++;
+#endif
+
+  return list;
+}
+
+bool print_gpus_list(struct gpu_list* list) {
+  for(int i=0; i < list->num_gpus; i++) {
+    printf("GPU %d: ", i);
+    if(list->gpus[i]->vendor == GPU_VENDOR_NVIDIA) {
+      #ifdef BACKEND_CUDA
+        print_gpu_cuda(list->gpus[i]);
+      #endif
+    }
+    else if(list->gpus[i]->vendor == GPU_VENDOR_INTEL) {
+      #ifdef BACKEND_INTEL
+        print_gpu_intel(list->gpus[i]);
+      #endif
+    }
+  }
+
+  return true;
+}
+
+struct gpu_info* get_gpu_info(struct gpu_list* list, int idx) {
+  return list->gpus[idx];
+}
--- a/src/common/master.hpp
+++ b/src/common/master.hpp
@@ -0,0 +1,12 @@
+#ifndef __GPU_LIST__
+#define __GPU_LIST__
+
+#include "gpu.hpp"
+
+struct gpu_list;
+
+struct gpu_list* get_gpu_list();
+bool print_gpus_list(struct gpu_list* list);
+struct gpu_info* get_gpu_info(struct gpu_list* list, int idx);
+
+#endif
--- a/src/common/pci.cpp
+++ b/src/common/pci.cpp
@@ -2,33 +2,61 @@
 #include "pci.hpp"
 #include <cstddef>

-/*
- * doc: https://wiki.osdev.org/PCI#Class_Codes
- *      https://pci-ids.ucw.cz/read/PC
- */
-#define VENDOR_ID_NVIDIA 0x10de
 #define CLASS_VGA_CONTROLLER 0x0300

-uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices) {
+bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
  for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
-    if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
-      return dev->vendor_id;
+    if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
+      return true;
    }
  }
-  printErr("Unable to find a CUDA device using pciutils");
-  return 0;
+
+  printWarn("Unable to find a valid device for id %d using pciutils", id);
+  return false;
 }

-uint16_t pciutils_get_pci_device_id(struct pci_dev *devices) {
+uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) {
  for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
-   if(dev->vendor_id == VENDOR_ID_NVIDIA && dev->device_class == CLASS_VGA_CONTROLLER) {
+   if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
      return dev->device_id;
    }
  }
-  printErr("Unable to find a CUDA device using pciutils");
+
+  printErr("Unable to find a valid device for id %d using pciutils", id);
  return 0;
 }

+void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) {
+  bool found = false;
+
+  for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
+   if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
+      pci->domain = dev->domain;
+      pci->bus = dev->bus;
+      pci->dev = dev->dev;
+      pci->func = dev->func;
+      found = true;
+    }
+  }
+
+  if(!found) printErr("Unable to find a valid device for id %d using pciutils", id);
+}
+
+struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) {
+  struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
+
+  // TODO: Refactor this; instead of 2xGet + 1xSet, do it better
+  if(pciutils_is_vendor_id_present(devices, id)) {
+    pci->vendor_id = id;
+    pci->device_id = pciutils_get_pci_device_id(devices, id);
+    pciutils_set_pci_bus(pci, devices, id);
+    return pci;
+  }
+  else {
+    return NULL;
+  }
+}
+
 struct pci_dev *get_pci_devices_from_pciutils() {
  struct pci_access *pacc;
  struct pci_dev *dev;
--- a/src/common/pci.hpp
+++ b/src/common/pci.hpp
@@ -6,8 +6,16 @@ extern "C" {
  #include <pci/pci.h>
 }

-uint16_t pciutils_get_pci_vendor_id(struct pci_dev *devices);
-uint16_t pciutils_get_pci_device_id(struct pci_dev *devices);
+struct pci {
+  uint16_t vendor_id;
+  uint16_t device_id;
+  uint16_t domain;
+  uint16_t bus;
+  uint16_t dev;
+  uint16_t func;
+};
+
+struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id);
 struct pci_dev *get_pci_devices_from_pciutils();

 #endif
--- a/src/common/printer.cpp
+++ b/src/common/printer.cpp
@@ -9,6 +9,8 @@
 #include "../common/global.hpp"
 #include "../common/gpu.hpp"

+#include "../intel/uarch.hpp"
+#include "../intel/intel.hpp"
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"

@@ -34,11 +36,13 @@ enum {
  ATTRIBUTE_CHIP,
  ATTRIBUTE_UARCH,
  ATTRIBUTE_TECHNOLOGY,
+  ATTRIBUTE_GT,
  ATTRIBUTE_FREQUENCY,
  ATTRIBUTE_STREAMINGMP,
  ATTRIBUTE_CORESPERMP,
  ATTRIBUTE_CUDA_CORES,
  ATTRIBUTE_TENSOR_CORES,
+  ATTRIBUTE_EUS,
  ATTRIBUTE_L2,
  ATTRIBUTE_MEMORY,
  ATTRIBUTE_MEMORY_FREQ,
@@ -52,11 +56,13 @@ static const char* ATTRIBUTE_FIELDS [] = {
  "GPU processor:",
  "Microarchitecture:",
  "Technology:",
+  "Graphics Tier:",
  "Max Frequency:",
  "SMs:",
  "Cores/SM:",
  "CUDA Cores:",
  "Tensor Cores:",
+  "Execution Units:",
  "L2 Size:",
  "Memory:",
  "Memory frequency:",
@@ -70,11 +76,13 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
  "Processor:",
  "uArch:",
  "Technology:",
+  "GT:",
  "Max Freq.:",
  "SMs:",
  "Cores/SM:",
  "CUDA Cores:",
  "Tensor Cores:",
+  "EUs:",
  "L2 Size:",
  "Memory:",
  "Memory freq.:",
@@ -200,23 +208,32 @@ void replace_bgbyfg_color(struct ascii_logo* logo) {
  for(int i=0; i < 2; i++) {
    if(logo->color_ascii[i] == NULL) break;

-    if(strcmp(logo->color_ascii[i], COLOR_BG_BLACK) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLACK);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_RED) == 0) strcpy(logo->color_ascii[i], COLOR_FG_RED);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_GREEN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_GREEN);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], COLOR_FG_YELLOW);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_BLUE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_BLUE);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], COLOR_FG_MAGENTA);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_CYAN) == 0) strcpy(logo->color_ascii[i], COLOR_FG_CYAN);
-    else if(strcmp(logo->color_ascii[i], COLOR_BG_WHITE) == 0) strcpy(logo->color_ascii[i], COLOR_FG_WHITE);
+    if(strcmp(logo->color_ascii[i], C_BG_BLACK) == 0) strcpy(logo->color_ascii[i], C_FG_BLACK);
+    else if(strcmp(logo->color_ascii[i], C_BG_RED) == 0) strcpy(logo->color_ascii[i], C_FG_RED);
+    else if(strcmp(logo->color_ascii[i], C_BG_GREEN) == 0) strcpy(logo->color_ascii[i], C_FG_GREEN);
+    else if(strcmp(logo->color_ascii[i], C_BG_YELLOW) == 0) strcpy(logo->color_ascii[i], C_FG_YELLOW);
+    else if(strcmp(logo->color_ascii[i], C_BG_BLUE) == 0) strcpy(logo->color_ascii[i], C_FG_BLUE);
+    else if(strcmp(logo->color_ascii[i], C_BG_MAGENTA) == 0) strcpy(logo->color_ascii[i], C_FG_MAGENTA);
+    else if(strcmp(logo->color_ascii[i], C_BG_CYAN) == 0) strcpy(logo->color_ascii[i], C_FG_CYAN);
+    else if(strcmp(logo->color_ascii[i], C_BG_WHITE) == 0) strcpy(logo->color_ascii[i], C_FG_WHITE);
+  }
+}
+
+struct ascii_logo* choose_ascii_art_aux(struct ascii_logo* logo_long, struct ascii_logo* logo_short, struct terminal* term, int lf) {
+  if(ascii_fits_screen(term->w, *logo_long, lf)) {
+    return logo_long;
+  }
+  else {
+    return logo_short;
  }
 }

 void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* term, int lf) {
  if(art->vendor == GPU_VENDOR_NVIDIA) {
-    if(term != NULL && ascii_fits_screen(term->w, logo_nvidia_l, lf))
-      art->art = &logo_nvidia_l;
-    else
-      art->art = &logo_nvidia;
+    art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf);
+  }
+  else if(art->vendor == GPU_VENDOR_INTEL) {
+    art->art = choose_ascii_art_aux(&logo_intel_l, &logo_intel, term, lf);
  }
  else {
    art->art = &logo_unknown;
@@ -228,10 +245,10 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
  switch(art->style) {
    case STYLE_LEGACY:
      logo->replace_blocks = false;
-      strcpy(logo->color_text[0], COLOR_NONE);
-      strcpy(logo->color_text[1], COLOR_NONE);
-      strcpy(logo->color_ascii[0], COLOR_NONE);
-      strcpy(logo->color_ascii[1], COLOR_NONE);
+      strcpy(logo->color_text[0], C_NONE);
+      strcpy(logo->color_text[1], C_NONE);
+      strcpy(logo->color_ascii[0], C_NONE);
+      strcpy(logo->color_ascii[1], C_NONE);
      art->reset[0] = '\0';
      break;
    case STYLE_RETRO:
@@ -245,7 +262,7 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
        strcpy(logo->color_ascii[0], rgb_to_ansi(cs[0], logo->replace_blocks, true));
        strcpy(logo->color_ascii[1], rgb_to_ansi(cs[1], logo->replace_blocks, true));
      }
-      strcpy(art->reset, COLOR_RESET);
+      strcpy(art->reset, C_RESET);
      break;
    case STYLE_INVALID:
    default:
@@ -342,6 +359,48 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t text_space, con
  printf("\n");
 }

+#ifdef BACKEND_INTEL
+bool print_gpufetch_intel(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
+  struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);
+
+  if(art == NULL)
+    return false;
+
+  char* gpu_name = get_str_gpu_name(gpu);
+  char* uarch = get_str_uarch_intel(gpu->arch);
+  char* gt = get_str_gt(gpu->arch);
+  char* manufacturing_process = get_str_process(gpu->arch);
+  char* eus = get_str_eu(gpu);
+  char* max_frequency = get_str_freq(gpu);
+  char* pp = get_str_peak_performance(gpu);
+
+  setAttribute(art, ATTRIBUTE_NAME, gpu_name);
+  setAttribute(art, ATTRIBUTE_UARCH, uarch);
+  setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
+  setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
+  setAttribute(art, ATTRIBUTE_GT, gt);
+  setAttribute(art, ATTRIBUTE_EUS, eus);
+  setAttribute(art, ATTRIBUTE_PEAK, pp);
+
+  const char** attribute_fields = ATTRIBUTE_FIELDS;
+  uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
+  uint32_t longest_field = longest_field_length(art, longest_attribute);
+  choose_ascii_art(art, cs, term, longest_field);
+
+  if(!ascii_fits_screen(term->w, *art->art, longest_field)) {
+    // Despite of choosing the smallest logo, the output does not fit
+    // Choose the shorter field names and recalculate the longest attr
+    attribute_fields = ATTRIBUTE_FIELDS_SHORT;
+    longest_attribute = longest_attribute_length(art, attribute_fields);
+  }
+
+  print_ascii_generic(art, longest_attribute, term->w - art->art->width, attribute_fields);
+
+  return true;
+}
+#endif
+
+#ifdef BACKEND_CUDA
 bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struct terminal* term) {
  struct ascii* art = set_ascii(get_gpu_vendor(gpu), s);

@@ -350,7 +409,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc

  char* gpu_name = get_str_gpu_name(gpu);
  char* gpu_chip = get_str_chip(gpu->arch);
-  char* uarch = get_str_uarch(gpu->arch);
+  char* uarch = get_str_uarch_cuda(gpu->arch);
  char* comp_cap = get_str_cc(gpu->arch);
  char* manufacturing_process = get_str_process(gpu->arch);
  char* sms = get_str_sm(gpu);
@@ -416,6 +475,7 @@ bool print_gpufetch_cuda(struct gpu_info* gpu, STYLE s, struct color** cs, struc

  return true;
 }
+#endif

 struct terminal* get_terminal_size() {
  struct terminal* term = (struct terminal*) emalloc(sizeof(struct terminal));
@@ -448,5 +508,17 @@ struct terminal* get_terminal_size() {
 bool print_gpufetch(struct gpu_info* gpu, STYLE s, struct color** cs) {
  struct terminal* term = get_terminal_size();

+  if(gpu->vendor == GPU_VENDOR_NVIDIA)
+    #ifdef BACKEND_CUDA
      return print_gpufetch_cuda(gpu, s, cs, term);
+    #else
+      return false;
+    #endif
+  else {
+    #ifdef BACKEND_INTEL
+      return print_gpufetch_intel(gpu, s, cs, term);
+    #else
+      return false;
+    #endif
+  }
 }
--- a/src/common/uarch.cpp
+++ b/src/common/uarch.cpp
@@ -0,0 +1,28 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "global.hpp"
+#include "uarch.hpp"
+
+char* get_str_process(struct uarch* arch) {
+  char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
+  int32_t process = arch->process;
+
+  if(process == UNK) {
+    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
+  }
+  else if(process > 100) {
+    sprintf(str, "%.2fum", (double)process/100);
+  }
+  else if(process > 0){
+    sprintf(str, "%dnm", process);
+  }
+  else {
+    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
+    printBug("Found invalid process: '%d'", process);
+  }
+
+  return str;
+}
+
--- a/src/common/uarch.hpp
+++ b/src/common/uarch.hpp
@@ -0,0 +1,31 @@
+#ifndef __COMMON_UARCH__
+#define __COMMON_UARCH__
+
+// Data not available
+#define NA                   -1
+
+// Unknown manufacturing process
+#define UNK                  -1
+
+typedef uint32_t GPUCHIP;
+typedef uint32_t MICROARCH;
+
+struct uarch {
+  // NVIDIA specific
+  int32_t cc_major;
+  int32_t cc_minor;
+  int32_t compute_capability;
+
+  // Intel specific
+  int32_t gt;
+  int32_t eu;
+
+  MICROARCH uarch;
+  GPUCHIP chip;
+
+  int32_t process;
+  char* uarch_str;
+  char* chip_str;
+};
+
+#endif
--- a/src/cuda/chips.hpp
+++ b/src/cuda/chips.hpp
@@ -1,10 +1,10 @@
-#ifndef __GPUCHIPS__
-#define __GPUCHIPS__
+#ifndef __CUDA_GPUCHIPS__
+#define __CUDA_GPUCHIPS__

 typedef uint32_t GPUCHIP;

 enum {
-  CHIP_UNKNOWN,
+  CHIP_UNKNOWN_CUDA,
  CHIP_G80,
  CHIP_G80GL,
  CHIP_G84,
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -6,40 +6,12 @@
 #include "../common/pci.hpp"
 #include "../common/global.hpp"

-int print_gpus_list() {
-  cudaError_t err = cudaSuccess;
-  int num_gpus = -1;
+bool print_gpu_cuda(struct gpu_info* gpu) {
+  char* cc = get_str_cc(gpu->arch);
+  printf("%s (Compute Capability %s)\n", gpu->name, cc);
+  free(cc);

-  if ((err = cudaGetDeviceCount(&num_gpus)) != cudaSuccess) {
-    printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
-    return EXIT_FAILURE;
-  }
-  printf("CUDA GPUs available: %d\n", num_gpus);
-
-  if(num_gpus > 0) {
-    cudaDeviceProp deviceProp;
-    int max_len = 0;
-
-    for(int idx=0; idx < num_gpus; idx++) {
-      if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
-        printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
-        return EXIT_FAILURE;
-      }
-      max_len = max(max_len, (int) strlen(deviceProp.name));
-    }
-
-    for(int i=0; i < max_len + 32; i++) putchar('-');
-    putchar('\n');
-    for(int idx=0; idx < num_gpus; idx++) {
-      if ((err = cudaGetDeviceProperties(&deviceProp, idx)) != cudaSuccess) {
-        printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
-        return EXIT_FAILURE;
-      }
-      printf("GPU %d: %s (Compute Capability %d.%d)\n", idx, deviceProp.name, deviceProp.major, deviceProp.minor);
-    }
-  }
-
-  return EXIT_SUCCESS;
+  return true;
 }

 struct cache* get_cache_info(cudaDeviceProp prop) {
@@ -104,12 +76,12 @@ struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
 }

 // Compute peak performance when using CUDA cores
-int64_t get_peak_performance(struct gpu_info* gpu) {
+int64_t get_peak_performance_cuda(struct gpu_info* gpu) {
  return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
 }

 // Compute peak performance when using tensor cores
-int64_t get_peak_performance_t(cudaDeviceProp prop, struct gpu_info* gpu) {
+int64_t get_peak_performance_tcu(cudaDeviceProp prop, struct gpu_info* gpu) {
  // Volta / Turing tensor cores performs 4x4x4 FP16 matrix multiplication
  // Ampere tensor cores performs 8x4x8 FP16 matrix multiplicacion
  if(prop.major == 7) return gpu->freq * 1000000 * 4 * 4 * 4  * 2 * gpu->topo->tensor_cores;
@@ -117,7 +89,7 @@ int64_t get_peak_performance_t(cudaDeviceProp prop, struct gpu_info* gpu) {
  else return 0;
 }

-struct gpu_info* get_gpu_info(int gpu_idx) {
+struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
  struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
  gpu->pci = NULL;
  gpu->idx = gpu_idx;
@@ -127,8 +99,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
    return NULL;
  }

+  if(gpu_idx == 0) {
    printf("Waiting for CUDA driver to start...");
    fflush(stdout);
+  }

  int num_gpus = -1;
  cudaError_t err = cudaSuccess;
@@ -136,7 +110,10 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
    printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err));
    return NULL;
  }
-  printf("\r                                   ");
+
+  if(gpu_idx == 0) {
+    printf("\r");
+  }

  if(num_gpus <= 0) {
    printErr("No CUDA capable devices found!");
@@ -144,7 +121,7 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
  }

  if(gpu->idx+1 > num_gpus) {
-    printErr("Requested GPU index %d in a system with %d GPUs", gpu->idx, num_gpus);
+    // Master is trying to query an invalid GPU
    return NULL;
  }

@@ -160,25 +137,22 @@ struct gpu_info* get_gpu_info(int gpu_idx) {
  strcpy(gpu->name, deviceProp.name);

  struct pci_dev *devices = get_pci_devices_from_pciutils();
-  gpu->pci = get_pci_from_pciutils(devices);
+  gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA);
  gpu->arch = get_uarch_from_cuda(gpu);
  gpu->cach = get_cache_info(deviceProp);
  gpu->mem = get_memory_info(gpu, deviceProp);
  gpu->topo = get_topology_info(deviceProp);
+<<<<<<< HEAD
  gpu->peak_performance = get_peak_performance(gpu);
  gpu->peak_performance_t = get_peak_performance_t(deviceProp, gpu);
+=======
+  gpu->peak_performance = get_peak_performance_cuda(gpu);
+  gpu->peak_performance_tcu = get_peak_performance_tcu(gpu);
+>>>>>>> origin/intel

  return gpu;
 }

-char* get_str_generic(int32_t data) {
-  // Largest int is 10, +1 for possible negative, +1 for EOL
-  uint32_t max_size = 12;
-  char* dummy = (char *) ecalloc(max_size, sizeof(char));
-  snprintf(dummy, max_size, "%d", data);
-  return dummy;
-}
-
 char* get_str_sm(struct gpu_info* gpu) {
  return get_str_generic(gpu->topo->streaming_mp);
 }
--- a/src/cuda/cuda.hpp
+++ b/src/cuda/cuda.hpp
@@ -1,10 +1,10 @@
-#ifndef __CUDA__
-#define __CUDA__
+#ifndef __CUDA_GPU__
+#define __CUDA_GPU__

 #include "../common/gpu.hpp"

-struct gpu_info* get_gpu_info(int gpu_idx);
-int print_gpus_list();
+struct gpu_info* get_gpu_info_cuda(int gpu_idx);
+bool print_gpu_cuda(struct gpu_info* gpu);
 char* get_str_sm(struct gpu_info* gpu);
 char* get_str_cores_sm(struct gpu_info* gpu);
 char* get_str_cuda_cores(struct gpu_info* gpu);
--- a/src/cuda/pci.cpp
+++ b/src/cuda/pci.cpp
@@ -8,21 +8,7 @@
 #define CHECK_PCI_START if (false) {}
 #define CHECK_PCI(pci, id, chip) \
   else if (pci->device_id == id) return chip;
-#define CHECK_PCI_END else { printBug("TODOO"); return CHIP_UNKNOWN; }
-
-struct pci {
-  uint16_t vendor_id;
-  uint16_t device_id;
-};
-
-struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
-  struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
-
-  pci->vendor_id = pciutils_get_pci_vendor_id(devices);
-  pci->device_id = pciutils_get_pci_device_id(devices);
-
-  return pci;
-}
+#define CHECK_PCI_END else { printBug("Unkown CUDA device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_CUDA; }

 /*
 * pci ids were retrieved using https://github.com/pciutils/pciids
@@ -33,7 +19,7 @@ struct pci* get_pci_from_pciutils(struct pci_dev *devices) {
 * or in pci.ids itself)
 */

-GPUCHIP get_chip_from_pci(struct pci* pci) {
+GPUCHIP get_chip_from_pci_cuda(struct pci* pci) {
  CHECK_PCI_START
  CHECK_PCI(pci, 0x25e5, CHIP_GA107BM)
  CHECK_PCI(pci, 0x25e2, CHIP_GA107BM)
--- a/src/cuda/pci.hpp
+++ b/src/cuda/pci.hpp
@@ -6,9 +6,14 @@
 #include "../common/pci.hpp"
 #include "chips.hpp"

+/*
+ * doc: https://wiki.osdev.org/PCI#Class_Codes
+ *      https://pci-ids.ucw.cz/read/PC
+ */
+#define PCI_VENDOR_ID_NVIDIA 0x10de
+
 struct pci;

-struct pci* get_pci_from_pciutils(struct pci_dev *devices);
-GPUCHIP get_chip_from_pci(struct pci* pci);
+GPUCHIP get_chip_from_pci_cuda(struct pci* pci);

 #endif
--- a/src/cuda/uarch.cpp
+++ b/src/cuda/uarch.cpp
@@ -3,21 +3,14 @@
 #include <stdint.h>
 #include <cstddef>

+#include "../common/uarch.hpp"
 #include "../common/global.hpp"
 #include "../common/gpu.hpp"
 #include "chips.hpp"

-typedef uint32_t MICROARCH;
-
 // Any clock multiplier
 #define CM_ANY               -1

-// Data not available
-#define NA                   -1
-
-// Unknown manufacturing process
-#define UNK                  -1
-
 // MICROARCH values
 enum {
  UARCH_UNKNOWN,
@@ -43,23 +36,10 @@ static const char *uarch_str[] = {
  /*[ARCH_AMPERE]     = */ "Ampere",
 };

-struct uarch {
-  int32_t cc_major;
-  int32_t cc_minor;
-  int32_t compute_capability;
-
-  MICROARCH uarch;
-  GPUCHIP chip;
-
-  int32_t process;
-  char* uarch_str;
-  char* chip_str;
-};
-
 #define CHECK_UARCH_START if (false) {}
 #define CHECK_UARCH(arch, chip_, str, uarch, process) \
   else if (arch->chip == chip_) fill_uarch(arch, str, uarch, process);
-#define CHECK_UARCH_END else { printBug("map_chip_to_uarch: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }
+#define CHECK_UARCH_END else { if(arch->chip != CHIP_UNKNOWN_CUDA) printBug("map_chip_to_uarch_cuda: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }

 void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t process) {
  arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
@@ -74,7 +54,7 @@ void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, uint32_t proce
 * o CHIP_XXXGL: indicates a professional-class (Quadro/Tesla) chip
 * o CHIP_XXXM:  indicates a mobile chip
 */
-void map_chip_to_uarch(struct uarch* arch) {
+void map_chip_to_uarch_cuda(struct uarch* arch) {
  CHECK_UARCH_START
  // TESLA (1.0, 1.1, 1.2, 1.3)                                //
  CHECK_UARCH(arch, CHIP_G80,      "G80",      UARCH_TESLA,   90)
@@ -263,9 +243,8 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
  arch->cc_major = deviceProp.major;
  arch->cc_minor = deviceProp.minor;
  arch->compute_capability = deviceProp.major * 10 + deviceProp.minor;
-  arch->chip = get_chip_from_pci(gpu->pci);
-
-  map_chip_to_uarch(arch);
+  arch->chip = get_chip_from_pci_cuda(gpu->pci);
+  map_chip_to_uarch_cuda(arch);

  return arch;
 }
@@ -335,10 +314,6 @@ MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
  CHECK_MEMTYPE_END
 }

-const char* get_str_uarch(struct uarch* arch) {
-  return uarch_str[arch->uarch];
-}
-
 char* get_str_cc(struct uarch* arch) {
  uint32_t max_size = 4;
  char* cc = (char *) ecalloc(max_size, sizeof(char));
@@ -346,31 +321,14 @@ char* get_str_cc(struct uarch* arch) {
  return cc;
 }

-char* get_str_process(struct uarch* arch) {
-  char* str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
-  int32_t process = arch->process;
-
-  if(process == UNK) {
-    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
-  }
-  else if(process > 100) {
-    sprintf(str, "%.2fum", (double)process/100);
-  }
-  else if(process > 0){
-    sprintf(str, "%dnm", process);
-  }
-  else {
-    snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
-    printBug("Found invalid process: '%d'", process);
-  }
-
-  return str;
-}
-
 char* get_str_chip(struct uarch* arch) {
  return arch->chip_str;
 }

+const char* get_str_uarch_cuda(struct uarch* arch) {
+  return uarch_str[arch->uarch];
+}
+
 void free_uarch_struct(struct uarch* arch) {
  free(arch->uarch_str);
  free(arch->chip_str);
--- a/src/cuda/uarch.hpp
+++ b/src/cuda/uarch.hpp
@@ -1,5 +1,5 @@
-#ifndef __UARCH__
-#define __UARCH__
+#ifndef __CUDA_UARCH__
+#define __CUDA_UARCH__

 #include "../common/gpu.hpp"

@@ -8,7 +8,7 @@ struct uarch;
 struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
 bool clkm_possible_for_uarch(int clkm, struct uarch* arch);
 MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
-char* get_str_uarch(struct uarch* arch);
+char* get_str_uarch_cuda(struct uarch* arch);
 char* get_str_cc(struct uarch* arch);
 char* get_str_chip(struct uarch* arch);
 char* get_str_process(struct uarch* arch);
--- a/src/intel/check.sh
+++ b/src/intel/check.sh
@@ -0,0 +1,12 @@
+#!/bin/bash -u
+# Checks the difference between supported uarchs
+# and uarchs that have their topology available
+# in file uarch.cpp
+
+uarchs="$(grep 'CHECK_UARCH' uarch.cpp | cut -d',' -f4-5 | grep 'UARCH_GEN' | tr -d ' ' | sort | uniq)"
+topos="$(grep 'CHECK_TOPO' uarch.cpp | cut -d',' -f3,4 | grep 'UARCH_' | tr -d ' ' | sort | uniq)"
+
+echo "$uarchs" > /tmp/uarchs.txt
+echo "$topos" > /tmp/topos.txt
+meld /tmp/uarchs.txt /tmp/topos.txt
+rm -f /tmp/uarchs.txt /tmp/topos.txt
--- a/src/intel/chips.hpp
+++ b/src/intel/chips.hpp
@@ -0,0 +1,59 @@
+#ifndef __INTEL_GPUCHIPS__
+#define __INTEL_GPUCHIPS__
+
+#include <stdint.h>
+
+typedef uint32_t GPUCHIP;
+
+enum {
+  CHIP_UNKNOWN_INTEL,
+  // Gen6
+  CHIP_HD_2000,
+  CHIP_HD_3000,
+  // Gen7
+  CHIP_HD_2500,
+  CHIP_HD_4000,
+  CHIP_HD_P4000,
+  // Gen7.5
+  CHIP_HD_4200,
+  CHIP_HD_4400,
+  CHIP_HD_4600,
+  CHIP_HD_P4600,
+  CHIP_IRIS_5100,
+  CHIP_IRISP_5200,
+  CHIP_IRISP_P5200,
+  // Gen8
+  CHIP_HD_5300,
+  CHIP_HD_5500,
+  CHIP_HD_5600,
+  CHIP_HD_P5700,
+  CHIP_HD_6000,
+  CHIP_IRIS_6100,
+  CHIP_IRISP_6200,
+  CHIP_IRISP_P6300,
+  // Gen9
+  CHIP_HD_510,
+  CHIP_HD_515,
+  CHIP_HD_520,
+  CHIP_HD_530,
+  CHIP_HD_P530,
+  CHIP_HD_540,
+  CHIP_HD_550,
+  CHIP_IRIS_P555,
+  CHIP_IRIS_580,
+  CHIP_IRIS_P580,
+  // Gen9.5
+  CHIP_UHD_600,
+  CHIP_UHD_605,
+  CHIP_UHD_620,
+  CHIP_UHD_630,
+  CHIP_HD_610,
+  CHIP_HD_615,
+  CHIP_HD_620,
+  CHIP_HD_630,
+  CHIP_HD_P630,
+  CHIP_IRISP_640,
+  CHIP_IRISP_650,
+};
+
+#endif
--- a/src/intel/intel.cpp
+++ b/src/intel/intel.cpp
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "intel.hpp"
+#include "uarch.hpp"
+#include "chips.hpp"
+#include "udev.hpp"
+#include "../common/pci.hpp"
+#include "../common/global.hpp"
+
+int64_t get_peak_performance_intel(struct gpu_info* gpu) {
+  return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2;
+}
+
+struct gpu_info* get_gpu_info_intel() {
+  struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
+  gpu->vendor = GPU_VENDOR_INTEL;
+
+  struct pci_dev *devices = get_pci_devices_from_pciutils();
+  gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
+
+  if(gpu->pci == NULL) {
+    // No Intel iGPU found in PCI, which means it is not present
+    return NULL;
+  }
+
+  gpu->arch = get_uarch_from_pci(gpu->pci);
+  gpu->name = get_name_from_uarch(gpu->arch);
+  gpu->topo_i = get_topology_info(gpu->arch);
+  gpu->freq = get_max_freq_from_file(gpu->pci);
+  gpu->peak_performance = get_peak_performance_intel(gpu);
+
+  return gpu;
+}
+
+bool print_gpu_intel(struct gpu_info* gpu) {
+  if(gpu->vendor != GPU_VENDOR_INTEL) return false;
+
+  printf("Intel %s\n", gpu->name);
+
+  return true;
+}
+
+char* get_str_eu(struct gpu_info* gpu) {
+  return get_str_generic(gpu->topo_i->subslices * gpu->topo_i->eu_subslice);
+}
--- a/src/intel/intel.hpp
+++ b/src/intel/intel.hpp
@@ -0,0 +1,10 @@
+#ifndef __INTEL_GPU__
+#define __INTEL_GPU__
+
+#include "../common/gpu.hpp"
+
+struct gpu_info* get_gpu_info_intel();
+bool print_gpu_intel(struct gpu_info* gpu);
+char* get_str_eu(struct gpu_info* gpu);
+
+#endif
--- a/src/intel/pci.cpp
+++ b/src/intel/pci.cpp
@@ -0,0 +1,88 @@
+#include <stdio.h>
+
+#include "pci.hpp"
+#include "chips.hpp"
+#include "../common/global.hpp"
+#include "../common/pci.hpp"
+
+#define CHECK_PCI_START if (false) {}
+#define CHECK_PCI(pci, id, chip) \
+   else if (pci->device_id == id) return chip;
+#define CHECK_PCI_END else { printBug("Unkown Intel device id: 0x%.4X", pci->device_id); return CHIP_UNKNOWN_INTEL; }
+
+/*
+ * https://github.com/mesa3d/mesa/blob/main/include/pci_ids/i965_pci_ids.h
+ */
+GPUCHIP get_chip_from_pci_intel(struct pci* pci) {
+  CHECK_PCI_START
+  // Gen6
+  CHECK_PCI(pci, 0x0102, CHIP_HD_2000)
+  CHECK_PCI(pci, 0x0106, CHIP_HD_2000)
+  CHECK_PCI(pci, 0x010A, CHIP_HD_2000)
+  CHECK_PCI(pci, 0x0112, CHIP_HD_3000)
+  CHECK_PCI(pci, 0x0122, CHIP_HD_3000)
+  CHECK_PCI(pci, 0x0116, CHIP_HD_3000)
+  CHECK_PCI(pci, 0x0126, CHIP_HD_3000)
+  // Gen7
+  CHECK_PCI(pci, 0x0152, CHIP_HD_2500)
+  CHECK_PCI(pci, 0x0156, CHIP_HD_2500)
+  CHECK_PCI(pci, 0x0162, CHIP_HD_4000)
+  CHECK_PCI(pci, 0x0166, CHIP_HD_4000)
+  CHECK_PCI(pci, 0x016a, CHIP_HD_P4000)
+  // Gen7.5
+  CHECK_PCI(pci, 0x0A1E, CHIP_HD_4200)
+  CHECK_PCI(pci, 0x041E, CHIP_HD_4400)
+  CHECK_PCI(pci, 0x0A16, CHIP_HD_4400)
+  CHECK_PCI(pci, 0x0412, CHIP_HD_4600)
+  CHECK_PCI(pci, 0x0416, CHIP_HD_4600)
+  CHECK_PCI(pci, 0x0D12, CHIP_HD_4600)
+  CHECK_PCI(pci, 0x041A, CHIP_HD_P4600)
+  CHECK_PCI(pci, 0x0A2E, CHIP_IRIS_5100)
+  CHECK_PCI(pci, 0x0D22, CHIP_IRISP_5200)
+  CHECK_PCI(pci, 0x0D26, CHIP_IRISP_P5200)
+  // Gen8
+  CHECK_PCI(pci, 0x161E, CHIP_HD_5300)
+  CHECK_PCI(pci, 0x1616, CHIP_HD_5500)
+  CHECK_PCI(pci, 0x1612, CHIP_HD_5600)
+  CHECK_PCI(pci, 0x161A, CHIP_HD_P5700)
+  CHECK_PCI(pci, 0x1626, CHIP_HD_6000)
+  CHECK_PCI(pci, 0x162B, CHIP_IRIS_6100)
+  CHECK_PCI(pci, 0x1622, CHIP_IRISP_6200)
+  CHECK_PCI(pci, 0x162A, CHIP_IRISP_P6300)
+  // Gen9
+  CHECK_PCI(pci, 0x1902, CHIP_HD_510)
+  CHECK_PCI(pci, 0x1906, CHIP_HD_510)
+  CHECK_PCI(pci, 0x190B, CHIP_HD_510)
+  CHECK_PCI(pci, 0x191E, CHIP_HD_515)
+  CHECK_PCI(pci, 0x1916, CHIP_HD_520)
+  CHECK_PCI(pci, 0x1921, CHIP_HD_520)
+  CHECK_PCI(pci, 0x1912, CHIP_HD_530)
+  CHECK_PCI(pci, 0x191B, CHIP_HD_530)
+  CHECK_PCI(pci, 0x191D, CHIP_HD_P530)
+  /*CHECK_PCI(pci, 0x5917, CHIP_HD_540)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_550)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_P555)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_580)
+  CHECK_PCI(pci, 0x5917, CHIP_HD_P580)*/
+  // Gen9.5
+  CHECK_PCI(pci, 0x3185, CHIP_UHD_600)
+  CHECK_PCI(pci, 0x3184, CHIP_UHD_605)
+  CHECK_PCI(pci, 0x5917, CHIP_UHD_620)
+  CHECK_PCI(pci, 0x3E91, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x3E92, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x3E98, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x3E9B, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x9BC5, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x9BC8, CHIP_UHD_630)
+  CHECK_PCI(pci, 0x5902, CHIP_HD_610)
+  CHECK_PCI(pci, 0x5906, CHIP_HD_610)
+  CHECK_PCI(pci, 0x590B, CHIP_HD_610)
+  CHECK_PCI(pci, 0x591E, CHIP_HD_615)
+  CHECK_PCI(pci, 0x5912, CHIP_HD_630)
+  CHECK_PCI(pci, 0x591B, CHIP_HD_630)
+  CHECK_PCI(pci, 0x591A, CHIP_HD_P630)
+  CHECK_PCI(pci, 0x591D, CHIP_HD_P630)
+  CHECK_PCI(pci, 0x5926, CHIP_IRISP_640)
+  CHECK_PCI(pci, 0x5927, CHIP_IRISP_650)
+  CHECK_PCI_END
+}
--- a/src/intel/pci.hpp
+++ b/src/intel/pci.hpp
@@ -0,0 +1,19 @@
+#ifndef __PCI_INTEL__
+#define __PCI_INTEL__
+
+#include <stdint.h>
+
+#include "../common/pci.hpp"
+#include "chips.hpp"
+
+/*
+ * doc: https://wiki.osdev.org/PCI#Class_Codes
+ *      https://pci-ids.ucw.cz/read/PC
+ */
+#define PCI_VENDOR_ID_INTEL 0x8086
+
+struct pci;
+
+GPUCHIP get_chip_from_pci_intel(struct pci* pci);
+
+#endif
--- a/src/intel/uarch.cpp
+++ b/src/intel/uarch.cpp
@@ -0,0 +1,212 @@
+#include <stdint.h>
+#include <cstddef>
+#include <string.h>
+#include <stdio.h>
+
+#include "../common/uarch.hpp"
+#include "../common/global.hpp"
+#include "../common/gpu.hpp"
+#include "chips.hpp"
+#include "pci.hpp"
+
+// Data not available
+#define NA                   -1
+
+// Unknown manufacturing process
+#define UNK                  -1
+
+/*
+ * Mapping between iGPU and CPU uarchs
+ * -----------------------------------
+ * Gen6:   Sandy Bridge (2th Gen)
+ * Gen7:   Ivy Brdige   (3th Gen)
+ * Gen7.5: Haswell      (4th Gen)
+ * Gen8:   Broadwell    (5th Gen)
+ * Gen9:   Skylake      (6th Gen)
+ * Gen9.5: Kaby Lake
+ */
+enum {
+  UARCH_UNKNOWN,
+  UARCH_GEN6,
+  UARCH_GEN7,
+  UARCH_GEN7_5,
+  UARCH_GEN8,
+  UARCH_GEN9,
+  UARCH_GEN9_5,
+};
+
+static const char *uarch_str[] = {
+  /*[ARCH_UNKNOWN    = */ STRING_UNKNOWN,
+  /*[ARCH_GEN6]      = */ "Gen6",
+  /*[ARCH_GEN7]      = */ "Gen7",
+  /*[ARCH_GEN7_5]    = */ "Gen7.5",
+  /*[ARCH_GEN8]      = */ "Gen8",
+  /*[ARCH_GEN9]      = */ "Gen9",
+  /*[ARCH_GEN9_5]    = */ "Gen9.5",
+};
+
+// Graphic Tiers (GT)
+enum {
+  GT_UNKNOWN,
+  GT1,
+  GT1_5,
+  GT2,
+  GT3,
+  GT3e,
+  GT4e
+};
+
+static const char *gt_str[] = {
+  /*[GT_UNKNOWN] = */ STRING_UNKNOWN,
+  /*[GT1]        = */ "GT1",
+  /*[GT1_5]      = */ "GT1.5",
+  /*[GT2]        = */ "GT2",
+  /*[GT3]        = */ "GT3",
+  /*[GT3e]       = */ "GT3e",
+  /*[GT4e]       = */ "GT4e",
+};
+
+#define CHECK_UARCH_START if (false) {}
+#define CHECK_UARCH(arch, chip_, str, uarch, gt, process) \
+   else if (arch->chip == chip_) fill_uarch(arch, str, uarch, gt, process);
+#define CHECK_UARCH_END else { printBug("map_chip_to_uarch_intel: Unknown chip id: %d", arch->chip); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, GT_UNKNOWN, 0); }
+
+#define CHECK_TOPO_START if (false) {}
+#define CHECK_TOPO(topo, arch, uarch_, gt_, eu_sub, sub, sli) \
+  else if(arch->uarch == uarch_ && arch->gt == gt_) fill_topo(topo, eu_sub, sub, sli);
+#define CHECK_TOPO_END else { printBug("TODOO"); fill_topo(topo, -1, -1, -1); }
+
+void fill_topo(struct topology_i* topo_i, int32_t eu_sub, int32_t sub, int32_t sli) {
+  topo_i->slices = sli;
+  topo_i->subslices = sub;
+  topo_i->eu_subslice = eu_sub;
+}
+
+void fill_uarch(struct uarch* arch, char const *str, MICROARCH u, int32_t gt, uint32_t process) {
+  arch->chip_str = (char *) emalloc(sizeof(char) * (strlen(str)+1));
+  strcpy(arch->chip_str, str);
+  arch->uarch = u;
+  arch->process = process;
+  arch->gt = gt;
+}
+
+void map_chip_to_uarch_intel(struct uarch* arch) {
+  CHECK_UARCH_START
+  // Gen6
+  CHECK_UARCH(arch, CHIP_HD_2000,     "HD Graphics 2000",        UARCH_GEN6,   GT1,  32)
+  CHECK_UARCH(arch, CHIP_HD_3000,     "HD Graphics 3000",        UARCH_GEN6,   GT2,  32)
+  // Gen7
+  CHECK_UARCH(arch, CHIP_HD_2500,     "HD Graphics 2500",        UARCH_GEN7,   GT1,  22)
+  CHECK_UARCH(arch, CHIP_HD_4000,     "HD Graphics 4000",        UARCH_GEN7,   GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_P4000,    "HD Graphics P4000",       UARCH_GEN7,   GT2,  22)
+  // Gen7.5
+  CHECK_UARCH(arch, CHIP_HD_4200,     "HD Graphics 4200",        UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_4400,     "HD Graphics 4400",        UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_4600,     "HD Graphics 4600",        UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_HD_P4600,    "HD Graphics P4600",       UARCH_GEN7_5, GT2,  22)
+  CHECK_UARCH(arch, CHIP_IRIS_5100,   "HD Iris 5100",            UARCH_GEN7_5, GT3,  22)
+  CHECK_UARCH(arch, CHIP_IRISP_5200,  "HD Iris Pro 5200",        UARCH_GEN7_5, GT3,  22)
+  CHECK_UARCH(arch, CHIP_IRISP_P5200, "HD Iris Pro P5200",       UARCH_GEN7_5, GT3,  22)
+  // Gen8
+  CHECK_UARCH(arch, CHIP_HD_5300,     "HD Graphics 5300",        UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_5500,     "HD Graphics 5500",        UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_5600,     "HD Graphics 5600",        UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_P5700,    "HD Graphics P5700",       UARCH_GEN8,   GT2,  14)
+  CHECK_UARCH(arch, CHIP_HD_6000,     "HD Graphics 6000",        UARCH_GEN8,   GT3,  14)
+  CHECK_UARCH(arch, CHIP_IRIS_6100,   "Iris Graphics 6100",      UARCH_GEN8,   GT3,  14)
+  CHECK_UARCH(arch, CHIP_IRISP_6200,  "Iris Pro Graphics 6200",  UARCH_GEN8,   GT3,  14)
+  CHECK_UARCH(arch, CHIP_IRISP_P6300, "Iris Pro Graphics P6300", UARCH_GEN8,   GT3,  14)
+  // Gen9
+  CHECK_UARCH(arch, CHIP_HD_510,      "HD Graphics 510",         UARCH_GEN9,   GT1,   14)
+  CHECK_UARCH(arch, CHIP_HD_515,      "HD Graphics 515",         UARCH_GEN9,   GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_520,      "HD Graphics 520",         UARCH_GEN9,   GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_530,      "HD Graphics 530",         UARCH_GEN9,   GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_P530,     "HD Graphics P530",        UARCH_GEN9,   GT2,   14)
+  // Gen9.5
+  CHECK_UARCH(arch, CHIP_UHD_600,     "UHD Graphics 600",        UARCH_GEN9_5, GT1,   14)
+  CHECK_UARCH(arch, CHIP_UHD_605,     "UHD Graphics 605",        UARCH_GEN9_5, GT1_5, 14)
+  CHECK_UARCH(arch, CHIP_UHD_620,     "UHD Graphics 620",        UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_UHD_630,     "UHD Graphics 630",        UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_610,      "HD Graphics 610",         UARCH_GEN9_5, GT1,   14)
+  CHECK_UARCH(arch, CHIP_HD_615,      "HD Graphics 615",         UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_630,      "HD Graphics 630",         UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_HD_P630,     "HD Graphics P630",        UARCH_GEN9_5, GT2,   14)
+  CHECK_UARCH(arch, CHIP_IRISP_640,   "Iris Plus Graphics 640",  UARCH_GEN9_5, GT3e,  14)
+  CHECK_UARCH(arch, CHIP_IRISP_640,   "Iris Plus Graphics 650",  UARCH_GEN9_5, GT3e,  14)
+  CHECK_UARCH_END
+}
+
+const char* get_str_uarch_intel(struct uarch* arch) {
+  return uarch_str[arch->uarch];
+}
+
+const char* get_str_gt(struct uarch* arch) {
+  return gt_str[arch->gt];
+}
+
+struct uarch* get_uarch_from_pci(struct pci* pci) {
+  struct uarch* arch = (struct uarch*) emalloc(sizeof(struct uarch));
+
+  arch->chip_str = NULL;
+  arch->chip = get_chip_from_pci_intel(pci);
+  if(arch->chip == CHIP_UNKNOWN_INTEL) {
+    return NULL;
+  }
+  else {
+    map_chip_to_uarch_intel(arch);
+    return arch;
+  }
+}
+
+char* get_name_from_uarch(struct uarch* arch) {
+  char* name = (char *) emalloc(sizeof(char) * (strlen(arch->chip_str) + 6 + 1));
+  sprintf(name, "Intel %s", arch->chip_str);
+  return name;
+}
+
+/*
+ * Refs:
+ * Gen6:     https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen6
+ * Gen7/7.5: https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen7
+             "The Compute Architecture of Intel Processor Graphics Gen7.5, v1.0"
+ * Gen8:     https://en.wikipedia.org/wiki/List_of_Intel_graphics_processing_units#Gen8
+             "The Compute Architecture of Intel Processor Graphics Gen8, v1.1"
+ * Gen9:     https://en.wikichip.org/wiki/intel/microarchitectures/gen9#Configuration
+             "The Compute Architecture of Intel Processor Graphics Gen9, v1.0"
+ * Gen9.5:   https://en.wikichip.org/wiki/intel/microarchitectures/gen9.5#Configuration
+ */
+struct topology_i* get_topology_info(struct uarch* arch) {
+  struct topology_i* topo = (struct topology_i*) emalloc(sizeof(struct topology_i));
+
+  // Syntax: (EU per subslice, Subslices, Slices)
+  CHECK_TOPO_START
+  // Gen6
+  CHECK_TOPO(topo, arch, UARCH_GEN6,   GT1,   6, 1, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN6,   GT2,   6, 2, 1)
+  // Gen7
+  CHECK_TOPO(topo, arch, UARCH_GEN7,   GT1,   6, 1, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7,   GT2,   8, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7,   GT3,   6, 1, 1)
+  // Gen7.5
+  CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT1,  10, 1, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT2,  10, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN7_5, GT3,  10, 4, 1)
+  // Gen8
+  CHECK_TOPO(topo, arch, UARCH_GEN8,   GT1,   6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN8,   GT2,   8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN8,   GT3,   8, 6, 2)
+  // Gen9
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT1,   6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT2,   8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT3,   8, 6, 2)
+  CHECK_TOPO(topo, arch, UARCH_GEN9,   GT4e,  8, 9, 3)
+  // Gen9.5
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1,   6, 2, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT1_5, 6, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT2,   8, 3, 1)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3,   8, 6, 2)
+  CHECK_TOPO(topo, arch, UARCH_GEN9_5, GT3e,  8, 6, 2) // Same as GT3, but has eDRAM cache
+  CHECK_TOPO_END
+
+  return topo;
+}
--- a/src/intel/uarch.hpp
+++ b/src/intel/uarch.hpp
@@ -0,0 +1,14 @@
+#ifndef __INTEL_UARCH__
+#define __INTEL_UARCH__
+
+#include "../common/gpu.hpp"
+
+struct uarch;
+
+struct uarch* get_uarch_from_pci(struct pci* pci);
+char* get_name_from_uarch(struct uarch* arch);
+char* get_str_gt(struct uarch* arch);
+char* get_str_uarch_intel(struct uarch* arch);
+struct topology_i* get_topology_info(struct uarch* arch);
+
+#endif
--- a/src/intel/udev.cpp
+++ b/src/intel/udev.cpp
@@ -0,0 +1,89 @@
+#include <cstddef>
+#include <cstring>
+#include <cstdlib>
+#include <cstdint>
+#include <cerrno>
+#include <cstdio>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "../common/global.hpp"
+#include "../common/pci.hpp"
+
+#define _PATH_SYS_SYSTEM        "/sys/devices/pci0000:00"
+#define _PATH_SYS_DRM           "/drm"
+#define _PATH_CARD              "/card0"
+#define _PATH_FREQUENCY_MAX     "/gt_max_freq_mhz"
+#define _PATH_FREQUENCY_MIN     "/gt_min_freq_mhz"
+
+#define _PATH_FREQUENCY_MAX_LEN 100
+#define DEFAULT_FILE_SIZE       4096
+#define UNKNOWN_DATA            -1
+
+char* read_file(char* path, int* len) {
+  int fd = open(path, O_RDONLY);
+
+  if(fd == -1) {
+    return NULL;
+  }
+
+  //File exists, read it
+  int bytes_read = 0;
+  int offset = 0;
+  int block = 128;
+  char* buf = (char *) emalloc(sizeof(char)*DEFAULT_FILE_SIZE);
+  memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE);
+
+  while (  (bytes_read = read(fd, buf+offset, block)) > 0 ) {
+    offset += bytes_read;
+  }
+
+  if (close(fd) == -1) {
+    return NULL;
+  }
+
+  *len = offset;
+  return buf;
+}
+
+long get_freq_from_file(char* path) {
+  int filelen;
+  char* buf;
+  if((buf = read_file(path, &filelen)) == NULL) {
+    printWarn("Could not open '%s'", path);
+    return UNKNOWN_DATA;
+  }
+
+  char* end;
+  errno = 0;
+  long ret = strtol(buf, &end, 10);
+  if(errno != 0) {
+    printBug("strtol: %s", strerror(errno));
+    free(buf);
+    return UNKNOWN_DATA;
+  }
+
+  // We will be getting the frequency in MHz
+  // We consider it is an error if frequency is
+  // greater than 10 GHz or less than 100 MHz
+  if(ret > 10000 || ret <  100) {
+    printBug("Invalid data was read from file '%s': %ld\n", path, ret);
+    return UNKNOWN_DATA;
+  }
+
+  free(buf);
+
+  return ret;
+}
+
+long get_max_freq_from_file(struct pci* pci) {
+  char path[_PATH_FREQUENCY_MAX_LEN];
+  sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MAX);
+  return get_freq_from_file(path);
+}
+
+long get_min_freq_from_file(struct pci* pci) {
+  char path[_PATH_FREQUENCY_MAX_LEN];
+  sprintf(path, "%s/%04x:%02x:%02x.%d%s%s%s", _PATH_SYS_SYSTEM, pci->domain, pci->bus, pci->dev, pci->func, _PATH_SYS_DRM, _PATH_CARD, _PATH_FREQUENCY_MIN);
+  return get_freq_from_file(path);
+}
--- a/src/intel/udev.hpp
+++ b/src/intel/udev.hpp
@@ -0,0 +1,7 @@
+#ifndef __UDEV__
+#define __UDEV__
+
+long get_max_freq_from_file(struct pci* pci);
+long get_min_freq_from_file(struct pci* pci);
+
+#endif