From fd15008ab2d2193afcd839a971791b380140f755 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Mon, 16 Aug 2021 13:52:57 +0200 Subject: [PATCH] [v0.04] Add error checking for CUDA calls --- src/cuda/cuda.cpp | 12 +++++++++--- src/cuda/nvmlb.cpp | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index e140bb4..ce5a5a3 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -77,7 +77,11 @@ struct gpu_info* get_gpu_info(int gpu_idx) { fflush(stdout); int num_gpus = -1; - cudaGetDeviceCount(&num_gpus); + cudaError_t err = cudaSuccess; + if ((err = cudaGetDeviceCount(&num_gpus)) != cudaSuccess) { + printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); + return NULL; + } printf("\r "); if(num_gpus <= 0) { @@ -90,9 +94,11 @@ struct gpu_info* get_gpu_info(int gpu_idx) { return NULL; } - cudaSetDevice(gpu_idx); cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, gpu_idx); + if ((err = cudaGetDeviceProperties(&deviceProp, gpu_idx)) != cudaSuccess) { + printErr("%s: %s", cudaGetErrorName(err), cudaGetErrorString(err)); + return NULL; + } gpu->freq = deviceProp.clockRate * 1e-3f; gpu->vendor = GPU_VENDOR_NVIDIA; diff --git a/src/cuda/nvmlb.cpp b/src/cuda/nvmlb.cpp index 357914c..4e7db84 100644 --- a/src/cuda/nvmlb.cpp +++ b/src/cuda/nvmlb.cpp @@ -23,7 +23,7 @@ struct nvml_data* nvml_init() { return data; } -bool nvml_get_pci_info(int dev, struct nvml_data* data) { +bool nvml_get_pci_info(int gpu_idx, struct nvml_data* data) { nvmlReturn_t result; nvmlDevice_t device; @@ -32,7 +32,7 @@ bool nvml_get_pci_info(int dev, struct nvml_data* data) { return false; } - if ((result = nvmlDeviceGetHandleByIndex(dev, &device)) != NVML_SUCCESS) { + if ((result = nvmlDeviceGetHandleByIndex(gpu_idx, &device)) != NVML_SUCCESS) { printErr("nvmlDeviceGetHandleByIndex: %s\n", nvmlErrorString(result)); return false; }