diff --git a/Makefile b/Makefile
index d79bbec..449f7b8 100644
--- a/Makefile
+++ b/Makefile
@@ -11,8 +11,8 @@ SRC_CUDA=src/cuda/
 COMMON_SRC = $(SRC_COMMON)main.cpp $(SRC_COMMON)gpu.cpp $(SRC_COMMON)args.cpp $(SRC_COMMON)global.cpp
 COMMON_HDR = $(SRC_COMMON)gpu.hpp $(SRC_COMMON)args.hpp $(SRC_COMMON)global.hpp
 
-CUDA_SRC = $(SRC_CUDA)cuda.cpp $(SRC_CUDA)uarch.cpp
-CUDA_HDR = $(SRC_CUDA)cuda.hpp $(SRC_CUDA)uarch.hpp
+CUDA_SRC = $(SRC_CUDA)cuda.cpp $(SRC_CUDA)uarch.cpp $(SRC_CUDA)pci.cpp $(SRC_CUDA)nvmlb.cpp
+CUDA_HDR = $(SRC_CUDA)cuda.hpp $(SRC_CUDA)uarch.hpp $(SRC_CUDA)pci.hpp $(SRC_CUDA)nvmlb.hpp
 CUDA_PATH = /usr/local/cuda/
 
 SOURCE += $(COMMON_SRC) $(CUDA_SRC)
@@ -20,7 +20,7 @@ HEADERS += $(COMMON_HDR) $(CUDA_HDR)
 
 OUTPUT=gpufetch
 
-CXXFLAGS+= -I $(CUDA_PATH)/samples/common/inc -I $(CUDA_PATH)/targets/x86_64-linux/include -L $(CUDA_PATH)/targets/x86_64-linux/lib -lcudart
+CXXFLAGS+= -I $(CUDA_PATH)/samples/common/inc -I $(CUDA_PATH)/targets/x86_64-linux/include -L $(CUDA_PATH)/targets/x86_64-linux/lib -lcudart -lnvidia-ml
 
 all: CXXFLAGS += -O3
 all: $(OUTPUT)
diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp
index c02bdaf..9cf1e7a 100644
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -4,6 +4,9 @@
 #include <stdint.h>
 #include <stdbool.h>
 
+#include "../cuda/nvmlb.hpp"
+#include "../cuda/pci.hpp"
+
 enum {
   GPU_VENDOR_NVIDIA
 };
@@ -42,6 +45,8 @@ struct gpu_info {
   struct uarch* arch;
   char* name;
   int64_t freq;
+  struct pci* pci;
+  struct nvml_data* nvmld;
   struct topology* topo;
   struct memory* mem;
   struct cache* cach;
diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp
index 71f1b20..d194df2 100644
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -2,6 +2,7 @@
 #include <cuda_runtime.h>
 
 #include "cuda.hpp"
+#include "nvmlb.hpp"
 #include "uarch.hpp"
 #include "../common/global.hpp"
 
@@ -29,6 +30,7 @@ int64_t get_peak_performance(struct gpu_info* gpu) {
 
 struct gpu_info* get_gpu_info() {
   struct gpu_info* gpu = (struct gpu_info*) emalloc(sizeof(struct gpu_info));
+  gpu->pci = NULL;
 
   printf("Waiting for CUDA driver to start...\n");
   int dev = 0;
@@ -41,6 +43,11 @@ struct gpu_info* get_gpu_info() {
   strcpy(gpu->name, deviceProp.name);
   gpu->freq = 10000;
 
+  gpu->nvmld = nvml_init();
+  if(nvml_get_pci_info(dev, gpu->nvmld)) {
+    gpu->pci = get_pci_from_nvml(gpu->nvmld);
+  }
+
   gpu->arch = get_uarch_from_cuda(gpu);
   gpu->cach = get_cache_info(gpu);
   gpu->topo = get_topology_info(gpu);
diff --git a/src/cuda/nvmlb.cpp b/src/cuda/nvmlb.cpp
new file mode 100644
index 0000000..1f566ea
--- /dev/null
+++ b/src/cuda/nvmlb.cpp
@@ -0,0 +1,70 @@
+#include <nvml.h>
+
+#include "nvmlb.hpp"
+#include "../common/global.hpp"
+
+struct nvml_data {
+  bool nvml_started;
+  nvmlPciInfo_t pci;
+};
+
+struct nvml_data* nvml_init() {
+  struct nvml_data* data = (struct nvml_data*) emalloc(sizeof(struct nvml_data));
+  data->nvml_started = false;
+
+  nvmlReturn_t result;
+
+  if ((result = nvmlInit()) != NVML_SUCCESS) {
+    printErr("nvmlInit: %s\n", nvmlErrorString(result));
+    return NULL;
+  }
+
+  data->nvml_started = true;
+  return data;
+}
+
+bool nvml_get_pci_info(int dev, struct nvml_data* data) {
+  nvmlReturn_t result;
+  nvmlDevice_t device;
+
+  if(!data->nvml_started) {
+    printErr("nvml_get_pci_info: nvml was not started");
+    return false;
+  }
+
+  if ((result = nvmlDeviceGetHandleByIndex(dev, &device)) != NVML_SUCCESS) {
+    printErr("nvmlDeviceGetHandleByIndex: %s\n", nvmlErrorString(result));
+    return false;
+  }
+
+  if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != result) {
+    printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
+    return false;
+  }
+
+  return true;
+}
+
+uint16_t nvml_get_pci_vendor_id(struct nvml_data* data) {
+  return data->pci.pciDeviceId & 0x0000FFFF;
+}
+
+uint16_t nvml_get_pci_device_id(struct nvml_data* data) {
+  return (data->pci.pciDeviceId & 0xFFFF0000) >> 16;
+}
+
+bool nvml_shutdown(struct nvml_data* data) {
+  nvmlReturn_t result;
+
+  if(!data->nvml_started) {
+    printWarn("nvml_get_pci_info: nvml was not started");
+    return true;
+  }
+
+  if ((result = nvmlShutdown()) != NVML_SUCCESS) {
+    printErr("nvmlShutdown: %s\n", nvmlErrorString(result));
+    return false;
+  }
+
+  return true;
+}
diff --git a/src/cuda/nvmlb.hpp b/src/cuda/nvmlb.hpp
new file mode 100644
index 0000000..92c2224
--- /dev/null
+++ b/src/cuda/nvmlb.hpp
@@ -0,0 +1,16 @@
+// NVML Backend
+#ifndef __NVMLB__
+#define __NVMLB__
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct nvml_data;
+
+struct nvml_data* nvml_init();
+bool nvml_get_pci_info(int dev, struct nvml_data* data);
+uint16_t nvml_get_pci_vendor_id(struct nvml_data* data);
+uint16_t nvml_get_pci_device_id(struct nvml_data* data);
+bool nvml_shutdown(struct nvml_data* data);
+
+#endif
diff --git a/src/cuda/pci.cpp b/src/cuda/pci.cpp
new file mode 100644
index 0000000..3be3577
--- /dev/null
+++ b/src/cuda/pci.cpp
@@ -0,0 +1,22 @@
+#include <stdio.h>
+
+#include "pci.hpp"
+#include "nvmlb.hpp"
+#include "../common/global.hpp"
+
+struct pci {
+  uint16_t vendor_id;
+  uint16_t device_id;
+};
+
+struct pci* get_pci_from_nvml(struct nvml_data* data) {
+  struct pci* pci = (struct pci*) emalloc(sizeof(struct pci));
+
+  pci->vendor_id = nvml_get_pci_vendor_id(data);
+  pci->device_id = nvml_get_pci_device_id(data);
+
+  printf("pci->vendor_id=0x%.4X\n", pci->vendor_id);
+  printf("pci->device_id=0x%.4X\n", pci->device_id);
+
+  return pci;
+}
diff --git a/src/cuda/pci.hpp b/src/cuda/pci.hpp
new file mode 100644
index 0000000..7952558
--- /dev/null
+++ b/src/cuda/pci.hpp
@@ -0,0 +1,11 @@
+#ifndef __PCI__
+#define __PCI__
+
+#include <stdint.h>
+#include "nvmlb.hpp"
+
+struct pci;
+
+struct pci* get_pci_from_nvml(struct nvml_data* data);
+
+#endif