[v0.23] Sort PCI devices; this makes the devices list to match CUDA driver ordering, which fixes a bug when there was more than one NVIDIA GPU
This commit is contained in:
@@ -47,7 +47,7 @@ else()
|
||||
link_libraries(${PCIUTILS_LIBRARIES})
|
||||
endif()
|
||||
|
||||
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
|
||||
add_executable(gpufetch ${COMMON_DIR}/main.cpp ${COMMON_DIR}/args.cpp ${COMMON_DIR}/gpu.cpp ${COMMON_DIR}/pci.cpp ${COMMON_DIR}/sort.cpp ${COMMON_DIR}/global.cpp ${COMMON_DIR}/printer.cpp ${COMMON_DIR}/master.cpp ${COMMON_DIR}/uarch.cpp)
|
||||
set(SANITY_FLAGS "-Wfloat-equal -Wshadow -Wpointer-arith")
|
||||
set(CMAKE_CXX_FLAGS "${SANITY_FLAGS} -Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c++11")
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
#include "../cuda/cuda.hpp"
|
||||
#include "../cuda/uarch.hpp"
|
||||
|
||||
static const char* VERSION = "0.22";
|
||||
static const char* VERSION = "0.23";
|
||||
|
||||
void print_help(char *argv[]) {
|
||||
const char **t = args_str;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
|
||||
#include "pci.hpp"
|
||||
#include "global.hpp"
|
||||
#include "colors.hpp"
|
||||
#include "master.hpp"
|
||||
@@ -16,6 +17,7 @@ struct gpu_list {
|
||||
|
||||
struct gpu_list* get_gpu_list() {
|
||||
int idx = 0;
|
||||
struct pci_dev *devices = get_pci_devices_from_pciutils();
|
||||
struct gpu_list* list = (struct gpu_list*) malloc(sizeof(struct gpu_list));
|
||||
list->num_gpus = 0;
|
||||
list->gpus = (struct gpu_info**) malloc(sizeof(struct info*) * MAX_GPUS);
|
||||
@@ -24,7 +26,7 @@ struct gpu_list* get_gpu_list() {
|
||||
bool valid = true;
|
||||
|
||||
while(valid) {
|
||||
list->gpus[idx] = get_gpu_info_cuda(idx);
|
||||
list->gpus[idx] = get_gpu_info_cuda(devices, idx);
|
||||
if(list->gpus[idx] != NULL) idx++;
|
||||
else valid = false;
|
||||
}
|
||||
@@ -33,7 +35,7 @@ struct gpu_list* get_gpu_list() {
|
||||
#endif
|
||||
|
||||
#ifdef BACKEND_INTEL
|
||||
list->gpus[idx] = get_gpu_info_intel();
|
||||
list->gpus[idx] = get_gpu_info_intel(devices);
|
||||
if(list->gpus[idx] != NULL) list->num_gpus++;
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
#include "sort.hpp"
|
||||
#include "global.hpp"
|
||||
#include "pci.hpp"
|
||||
#include "../cuda/pci.hpp"
|
||||
@@ -10,6 +11,14 @@
|
||||
#define PCI_VENDOR_ID_AMD 0x1002
|
||||
#define CLASS_VGA_CONTROLLER 0x0300
|
||||
|
||||
void debug_devices(struct pci_dev *devices) {
|
||||
int idx = 0;
|
||||
for(struct pci_dev *dev=devices; idx < 5 && dev != NULL; dev=dev->next) {
|
||||
printf("%04x:%02x:%02x.%d\n", dev->domain, dev->bus, dev->dev, dev->func);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
|
||||
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
|
||||
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
@@ -21,14 +30,19 @@ bool pciutils_is_vendor_id_present(struct pci_dev *devices, int id) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id) {
|
||||
uint16_t pciutils_get_pci_device_id(struct pci_dev *devices, int id, int idx) {
|
||||
int curr = 0;
|
||||
|
||||
for(struct pci_dev *dev=devices; dev != NULL; dev=dev->next) {
|
||||
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
return dev->device_id;
|
||||
if(dev->vendor_id == id && dev->device_class == CLASS_VGA_CONTROLLER) {
|
||||
if(curr == idx) {
|
||||
return dev->device_id;
|
||||
}
|
||||
curr++;
|
||||
}
|
||||
}
|
||||
|
||||
printErr("Unable to find a valid device for device id 0x%.4X using pciutils", id);
|
||||
printErr("Unable to find a valid device for device id 0x%.4X with idx %d using pciutils", id, idx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -48,13 +62,13 @@ void pciutils_set_pci_bus(struct pci* pci, struct pci_dev *devices, int id) {
|
||||
if(!found) printErr("Unable to find a valid device for id 0x%.4X using pciutils", id);
|
||||
}
|
||||
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id) {
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id, int idx) {
|
||||
struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
|
||||
|
||||
// TODO: Refactor this; instead of 2xGet + 1xSet, do it better
|
||||
if(pciutils_is_vendor_id_present(devices, id)) {
|
||||
pci->vendor_id = id;
|
||||
pci->device_id = pciutils_get_pci_device_id(devices, id);
|
||||
pci->device_id = pciutils_get_pci_device_id(devices, id, idx);
|
||||
pciutils_set_pci_bus(pci, devices, id);
|
||||
return pci;
|
||||
}
|
||||
@@ -75,6 +89,8 @@ struct pci_dev *get_pci_devices_from_pciutils() {
|
||||
pci_fill_info(dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS);
|
||||
}
|
||||
|
||||
sort_pci_devices(&pacc->devices);
|
||||
|
||||
return pacc->devices;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ struct pci {
|
||||
uint16_t func;
|
||||
};
|
||||
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id);
|
||||
struct pci* get_pci_from_pciutils(struct pci_dev *devices, int id, int idx);
|
||||
struct pci_dev *get_pci_devices_from_pciutils();
|
||||
void print_gpus_list_pci();
|
||||
|
||||
|
||||
61
src/common/sort.cpp
Normal file
61
src/common/sort.cpp
Normal file
@@ -0,0 +1,61 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include "pci.hpp"
|
||||
#include "global.hpp"
|
||||
|
||||
// Code inspired in lspci.c
|
||||
int compare_them(const void *A, const void *B) {
|
||||
const struct pci_dev *a = *(struct pci_dev **) A;
|
||||
const struct pci_dev *b = *(struct pci_dev **) B;
|
||||
|
||||
if (a->domain < b->domain)
|
||||
return -1;
|
||||
if (a->domain > b->domain)
|
||||
return 1;
|
||||
if (a->bus < b->bus)
|
||||
return -1;
|
||||
if (a->bus > b->bus)
|
||||
return 1;
|
||||
if (a->dev < b->dev)
|
||||
return -1;
|
||||
if (a->dev > b->dev)
|
||||
return 1;
|
||||
if (a->func < b->func)
|
||||
return -1;
|
||||
if (a->func > b->func)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sort_pci_devices(struct pci_dev **devices) {
|
||||
int i = 0;
|
||||
struct pci_dev **arr;
|
||||
|
||||
int cnt = 0;
|
||||
for(struct pci_dev *dev=*devices; dev != NULL; dev=dev->next) {
|
||||
cnt++;
|
||||
}
|
||||
|
||||
arr = (struct pci_dev **) emalloc(sizeof(struct pci_dev *) * cnt);
|
||||
for(struct pci_dev *dev=*devices; dev != NULL; dev=dev->next) {
|
||||
arr[i] = dev;
|
||||
i++;
|
||||
}
|
||||
|
||||
qsort(arr, cnt, sizeof(struct pci_dev *), compare_them);
|
||||
|
||||
struct pci_dev *ptr = *devices;
|
||||
struct pci_dev *ptrb = *devices;
|
||||
for(i = 0; i < cnt; i++) {
|
||||
ptr = arr[i];
|
||||
if(i > 0) {
|
||||
ptrb->next = ptr;
|
||||
}
|
||||
ptrb = ptr;
|
||||
}
|
||||
|
||||
ptr->next = NULL;
|
||||
*devices = arr[0];
|
||||
free(arr);
|
||||
}
|
||||
7
src/common/sort.hpp
Normal file
7
src/common/sort.hpp
Normal file
@@ -0,0 +1,7 @@
|
||||
#ifndef __SORT_PCI__
|
||||
#define __SORT_PCI__
|
||||
|
||||
void sort_pci_devices(struct pci_dev **first_dev);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -98,7 +98,7 @@ int64_t get_peak_performance_tcu(cudaDeviceProp prop, struct gpu_info* gpu) {
|
||||
else return 0;
|
||||
}
|
||||
|
||||
struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
|
||||
struct gpu_info* get_gpu_info_cuda(struct pci_dev *devices, int gpu_idx) {
|
||||
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
||||
gpu->pci = NULL;
|
||||
gpu->idx = gpu_idx;
|
||||
@@ -146,8 +146,7 @@ struct gpu_info* get_gpu_info_cuda(int gpu_idx) {
|
||||
gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
|
||||
strcpy(gpu->name, deviceProp.name);
|
||||
|
||||
struct pci_dev *devices = get_pci_devices_from_pciutils();
|
||||
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA);
|
||||
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_NVIDIA, gpu_idx);
|
||||
gpu->arch = get_uarch_from_cuda(gpu);
|
||||
gpu->cach = get_cache_info(deviceProp);
|
||||
gpu->mem = get_memory_info(gpu, deviceProp);
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#include "../common/gpu.hpp"
|
||||
#define CUDA_DRIVER_START_WARNING "Waiting for CUDA driver to start..."
|
||||
|
||||
struct gpu_info* get_gpu_info_cuda(int gpu_idx);
|
||||
struct gpu_info* get_gpu_info_cuda(struct pci_dev *devices, int gpu_idx);
|
||||
bool print_gpu_cuda(struct gpu_info* gpu);
|
||||
char* get_str_sm(struct gpu_info* gpu);
|
||||
char* get_str_cores_sm(struct gpu_info* gpu);
|
||||
|
||||
@@ -13,12 +13,10 @@ int64_t get_peak_performance_intel(struct gpu_info* gpu) {
|
||||
return gpu->freq * 1000000 * gpu->topo_i->eu_subslice * gpu->topo_i->subslices * 8 * 2;
|
||||
}
|
||||
|
||||
struct gpu_info* get_gpu_info_intel() {
|
||||
struct gpu_info* get_gpu_info_intel(struct pci_dev *devices) {
|
||||
struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
|
||||
gpu->vendor = GPU_VENDOR_INTEL;
|
||||
|
||||
struct pci_dev *devices = get_pci_devices_from_pciutils();
|
||||
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL);
|
||||
gpu->pci = get_pci_from_pciutils(devices, PCI_VENDOR_ID_INTEL, 0);
|
||||
|
||||
if(gpu->pci == NULL) {
|
||||
// No Intel iGPU found in PCI, which means it is not present
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include "../common/gpu.hpp"
|
||||
|
||||
struct gpu_info* get_gpu_info_intel();
|
||||
struct gpu_info* get_gpu_info_intel(struct pci_dev *devices);
|
||||
bool print_gpu_intel(struct gpu_info* gpu);
|
||||
char* get_str_eu(struct gpu_info* gpu);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user