[v0.23] Sort PCI devices; this makes the devices list to match CUDA driver ordering, which fixes a bug when there was more than one NVIDIA GPU

This commit is contained in:
Dr-Noob
2022-01-22 13:25:22 +01:00
parent 23586a18e9
commit bd1158c139
11 changed files with 103 additions and 20 deletions

View File

@@ -47,7 +47,7 @@ else()
link_libraries(${PCIUTILS_LIBRARIES})
endif()
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/sort.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11")

View File

@@ -8,7 +8,7 @@
#include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp"
static const char* VERSION = "0.22";
static const char* VERSION = "0.23";
void print_help(char *argv[]) {
const char **t = args_str;

View File

@@ -1,6 +1,7 @@
#include <cstdlib>
#include <cstdio>
#include "pci.hpp"
#include "global.hpp"
#include "colors.hpp"
#include "master.hpp"
@@ -16,6 +17,7 @@ struct gpu_list {
struct gpu_list* get_gpu_list() {
int idx = 0;
struct pci_dev *devices = get_pci_devices_from_pciutils();
struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
list->num_gpus = 0;
list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
@@ -24,7 +26,7 @@ struct gpu_list* get_gpu_list() {
bool valid = true;
while(valid) {
list->gpus[idx] = get_gpu_info_cuda(idx);
list->gpus[idx] = get_gpu_info_cuda(devices, idx);
if(list->gpus[idx] != NULL) idx++;
else valid = false;
}
@@ -33,7 +35,7 @@ struct gpu_list* get_gpu_list() {
#endif
#ifdef BACKEND_INTEL
list->gpus[idx] = get_gpu_info_intel();
list->gpus[idx] = get_gpu_info_intel(devices);
if(list->gpus[idx] != NULL) list->num_gpus++;
#endif

View File

@@ -1,3 +1,4 @@
#include "sort.hpp"
#include "global.hpp"
#include "pci.hpp"
#include "../cuda/pci.hpp"
@@ -10,6 +11,14 @@
#define PCI_VENDOR_ID_AMD 0x1002
#define CLASS_VGA_CONTROLLER 0x0300
void debug_devices(struct pci_dev *devices) {
int idx = 0;
for(struct pci_dev *dev=devices; idx < 5 && dev != NULL; dev=dev->next) {
printf("%04x:%02x:%02x.%d\n", dev->domain, dev->bus, dev->dev, dev->func);
idx++;
}
}
bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
@@ -21,14 +30,19 @@ bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
return false;
}
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) {
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id, int idx) {
int curr = 0;
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
return dev->device_id;
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
if(curr == idx) {
return dev->device_id;
}
curr++;
}
}
printErr("Unable to find a valid device for device id 0x%.4X using pciutils", id);
printErr("Unable to find a valid device for device id 0x%.4X with idx %d using pciutils", id, idx);
return 0;
}
@@ -48,13 +62,13 @@ void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) {
if(!found) printErr("Unable to find a valid device for id 0x%.4X using pciutils", id);
}
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) {
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id, int idx) {
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
// TODO: Refactor this; instead of 2xGet + 1xSet, do it better
if(pciutils_is_vendor_id_present(devices, id)) {
pci->vendor_id = id;
pci->device_id = pciutils_get_pci_device_id(devices, id);
pci->device_id = pciutils_get_pci_device_id(devices, id, idx);
pciutils_set_pci_bus(pci, devices, id);
return pci;
}
@@ -75,6 +89,8 @@ struct pci_dev *get_pci_devices_from_pciutils() {
pci_fill_info(dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS);
}
sort_pci_devices(&pacc->devices);
return pacc->devices;
}

View File

@@ -15,7 +15,7 @@ struct pci {
uint16_t func;
};
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id);
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id, int idx);
struct pci_dev *get_pci_devices_from_pciutils();
void print_gpus_list_pci();

61
src/common/sort.cpp Normal file
View File

@@ -0,0 +1,61 @@
#include <cstdio>
#include <cstdlib>
#include "pci.hpp"
#include "global.hpp"
// Code inspired in lspci.c
int compare_them(const void *A, const void *B) {
const struct pci_dev *a = *(struct pci_dev **) A;
const struct pci_dev *b = *(struct pci_dev **) B;
if (a->domain < b->domain)
return -1;
if (a->domain > b->domain)
return 1;
if (a->bus < b->bus)
return -1;
if (a->bus > b->bus)
return 1;
if (a->dev < b->dev)
return -1;
if (a->dev > b->dev)
return 1;
if (a->func < b->func)
return -1;
if (a->func > b->func)
return 1;
return 0;
}
void sort_pci_devices(struct pci_dev **devices) {
int i = 0;
struct pci_dev **arr;
int cnt = 0;
for(struct pci_dev *dev=*devices; dev != NULL; dev=dev->next) {
cnt++;
}
arr = (struct pci_dev **) emalloc(sizeof(struct pci_dev *) * cnt);
for(struct pci_dev *dev=*devices; dev != NULL; dev=dev->next) {
arr[i] = dev;
i++;
}
qsort(arr, cnt, sizeof(struct pci_dev *), compare_them);
struct pci_dev *ptr = *devices;
struct pci_dev *ptrb = *devices;
for(i = 0; i < cnt; i++) {
ptr = arr[i];
if(i > 0) {
ptrb->next = ptr;
}
ptrb = ptr;
}
ptr->next = NULL;
*devices = arr[0];
free(arr);
}

7
src/common/sort.hpp Normal file
View File

@@ -0,0 +1,7 @@
#ifndef __SORT_PCI__
#define __SORT_PCI__
void sort_pci_devices(struct pci_dev **first_dev);
#endif

View File

@@ -98,7 +98,7 @@ int64_t get_peak_performance_tcu(cudaDeviceProp prop, struct gpu_info* gpu) {
else return 0;
}
struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
struct gpu_info* get_gpu_info_cuda(struct pci_dev *devices, int gpu_idx) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->pci = NULL;
gpu->idx = gpu_idx;
@@ -146,8 +146,7 @@ struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
strcpy(gpu->name, deviceProp.name);
struct pci_dev *devices = get_pci_devices_from_pciutils();
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA);
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA, gpu_idx);
gpu->arch = get_uarch_from_cuda(gpu);
gpu->cach = get_cache_info(deviceProp);
gpu->mem = get_memory_info(gpu, deviceProp);

View File

@@ -4,7 +4,7 @@
#include "../common/gpu.hpp"
#define CUDA_DRIVER_START_WARNING "Waiting for CUDA driver to start..."
struct gpu_info* get_gpu_info_cuda(int gpu_idx);
struct gpu_info* get_gpu_info_cuda(struct pci_dev *devices, int gpu_idx);
bool print_gpu_cuda(struct gpu_info* gpu);
char* get_str_sm(struct gpu_info* gpu);
char* get_str_cores_sm(struct gpu_info* gpu);

View File

@@ -13,12 +13,10 @@ int64_t get_peak_performance_intel(struct gpu_info* gpu) {
return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2;
}
struct gpu_info* get_gpu_info_intel() {
struct gpu_info* get_gpu_info_intel(struct pci_dev *devices) {
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
gpu->vendor = GPU_VENDOR_INTEL;
struct pci_dev *devices = get_pci_devices_from_pciutils();
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL, 0);
if(gpu->pci == NULL) {
// No Intel iGPU found in PCI, which means it is not present

View File

@@ -3,7 +3,7 @@
#include "../common/gpu.hpp"
struct gpu_info* get_gpu_info_intel();
struct gpu_info* get_gpu_info_intel(struct pci_dev *devices);
bool print_gpu_intel(struct gpu_info* gpu);
char* get_str_eu(struct gpu_info* gpu);