From 981daef728029e34f823832c3d45d3694fc8fcf2 Mon Sep 17 00:00:00 2001
From: Dr-Noob <peibolms@gmail.com>
Date: Fri, 13 Aug 2021 16:36:10 +0200
Subject: [PATCH] [v0.01] Retrieve more info; frequency, topology and peak
 performance

---
 src/common/gpu.cpp  | 51 ++++++++++++++++++++++++++++++++++++++++++---
 src/common/gpu.hpp  |  2 ++
 src/common/main.cpp |  6 +++---
 src/cuda/cuda.cpp   | 24 ++++++++++++---------
 src/cuda/cuda.hpp   |  6 +++---
 src/cuda/nvmlb.cpp  |  2 +-
 6 files changed, 71 insertions(+), 20 deletions(-)
diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp
index e6cf603..4dbbee0 100644
--- a/src/common/gpu.cpp
+++ b/src/common/gpu.cpp
@@ -1,12 +1,37 @@
-#include "gpu.hpp"
 #include <cstddef>
+#include <cstring>
+#include <cstdio>
+#include <cassert>
+
+#include "../common/global.hpp"
+#include "gpu.hpp"
+
+#define STRING_YES        "Yes"
+#define STRING_NO         "No"
+#define STRING_NONE       "None"
+#define STRING_MEGAHERZ   "MHz"
+#define STRING_GIGAHERZ   "GHz"
+#define STRING_KILOBYTES  "KB"
+#define STRING_MEGABYTES  "MB"
 
 char* get_str_gpu_name(struct gpu_info* gpu) {
   return gpu->name;
 }
 
 char* get_str_freq(struct gpu_info* gpu) {
-  return NULL;
+  // Max 5 digits and 3 for '(M/G)Hz'
+  uint32_t size = (5+1+3+1);
+  assert(strlen(STRING_UNKNOWN)+1 <= size);
+  char* string = (char *) ecalloc(size, sizeof(char));
+
+  if(gpu->freq == UNKNOWN_FREQ || gpu->freq < 0)
+    snprintf(string,strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
+  else if(gpu->freq >= 1000)
+    snprintf(string,size,"%.3f "STRING_GIGAHERZ, (float)(gpu->freq)/1000);
+  else
+    snprintf(string,size,"%.3f "STRING_MEGAHERZ, (float)gpu->freq);
+
+  return string;
 }
 
 char* get_str_memory_size(struct gpu_info* gpu) {
@@ -26,5 +51,25 @@ char* get_str_l2(struct gpu_info* gpu) {
 }
 
 char* get_str_peak_performance(struct gpu_info* gpu) {
-  return NULL;
+  char* str;
+
+  if(gpu->peak_performance == -1) {
+    str = (char *) emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
+    strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
+    return str;
+  }
+
+  // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
+  double flopsd = (double) gpu->peak_performance;
+  uint32_t max_size = 7+1+7+1;
+  str = (char *) ecalloc(max_size, sizeof(char));
+
+  if(flopsd >= (double)1000000000000.0)
+    snprintf(str, max_size, "%.2f TFLOP/s", flopsd/1000000000000);
+  else if(flopsd >= 1000000000.0)
+    snprintf(str, max_size, "%.2f GFLOP/s", flopsd/1000000000);
+  else
+    snprintf(str, max_size, "%.2f MFLOP/s", flopsd/1000000);
+
+  return str;
 }
diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp
index 9cf1e7a..89ea9ec 100644
--- a/src/common/gpu.hpp
+++ b/src/common/gpu.hpp
@@ -7,6 +7,8 @@
 #include "../cuda/nvmlb.hpp"
 #include "../cuda/pci.hpp"
 
+#define UNKNOWN_FREQ -1
+
 enum {
   GPU_VENDOR_NVIDIA
 };
diff --git a/src/common/main.cpp b/src/common/main.cpp
index 5908285..6609162 100644
--- a/src/common/main.cpp
+++ b/src/common/main.cpp
@@ -59,9 +59,9 @@ int main(int argc, char* argv[]) {
   printf("Compute Capability: %s\n", get_str_cc(gpu->arch));
   printf("Technology:         %s\n", get_str_process(gpu->arch));
   printf("Max Frequency:      %s\n", get_str_freq(gpu));
-  printf("SM:                 %s\n", get_str_sm(gpu));
-  printf("Cores/MP:           %s\n", get_str_cores_sm(gpu));
-  printf("CUDA cores:         %s\n", get_str_cuda_cores(gpu));
+  printf("SM:                 %d\n", get_str_sm(gpu));
+  printf("Cores/MP:           %d\n", get_str_cores_sm(gpu));
+  printf("CUDA cores:         %d\n", get_str_cuda_cores(gpu));
   printf("Memory size:        %s\n", get_str_memory_size(gpu));
   printf("Memory type:        %s\n", get_str_memory_type(gpu));
   printf("L1 size:            %s\n", get_str_l1(gpu));
diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp
index d194df2..5e59e5a 100644
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -12,9 +12,13 @@ struct cache* get_cache_info(struct gpu_info* gpu) {
   return cach;
 }
 
-struct topology* get_topology_info(struct gpu_info* gpu) {
+struct topology* get_topology_info(struct gpu_info* gpu, cudaDeviceProp prop) {
   struct topology* topo = (struct topology*) emalloc(sizeof(struct topology));
 
+  topo->streaming_mp = prop.multiProcessorCount;
+  topo->cores_per_mp = _ConvertSMVer2Cores(prop.major, prop.minor);
+  topo->cuda_cores = topo->streaming_mp * topo->cores_per_mp;
+
   return topo;
 }
 
@@ -25,7 +29,7 @@ struct memory* get_memory_info(struct gpu_info* gpu) {
 }
 
 int64_t get_peak_performance(struct gpu_info* gpu) {
-  return 1000;
+  return gpu->freq * 1000000 * gpu->topo->cuda_cores * 2;
 }
 
 struct gpu_info* get_gpu_info() {
@@ -38,10 +42,10 @@ struct gpu_info* get_gpu_info() {
   cudaDeviceProp deviceProp;
   cudaGetDeviceProperties(&deviceProp, dev);
 
+  gpu->freq = deviceProp.clockRate * 1e-3f;
   gpu->vendor = GPU_VENDOR_NVIDIA;
   gpu->name = (char *) emalloc(sizeof(char) * (strlen(deviceProp.name) + 1));
   strcpy(gpu->name, deviceProp.name);
-  gpu->freq = 10000;
 
   gpu->nvmld = nvml_init();
   if(nvml_get_pci_info(dev, gpu->nvmld)) {
@@ -50,21 +54,21 @@ struct gpu_info* get_gpu_info() {
 
   gpu->arch = get_uarch_from_cuda(gpu);
   gpu->cach = get_cache_info(gpu);
-  gpu->topo = get_topology_info(gpu);
+  gpu->topo = get_topology_info(gpu, deviceProp);
   gpu->peak_performance = get_peak_performance(gpu);
 
   return gpu;
 }
 
-char* get_str_sm(struct gpu_info* gpu) {
-  return NULL;
+int32_t get_str_sm(struct gpu_info* gpu) {
+  return gpu->topo->streaming_mp;
 }
 
-char* get_str_cores_sm(struct gpu_info* gpu) {
-  return NULL;
+int32_t get_str_cores_sm(struct gpu_info* gpu) {
+  return gpu->topo->cores_per_mp;
 }
 
-char* get_str_cuda_cores(struct gpu_info* gpu) {
-  return NULL;
+int32_t get_str_cuda_cores(struct gpu_info* gpu) {
+  return gpu->topo->cuda_cores;
 }
 
diff --git a/src/cuda/cuda.hpp b/src/cuda/cuda.hpp
index 72ea92a..f77329e 100644
--- a/src/cuda/cuda.hpp
+++ b/src/cuda/cuda.hpp
@@ -4,8 +4,8 @@
 #include "../common/gpu.hpp"
 
 struct gpu_info* get_gpu_info();
-char* get_str_sm(struct gpu_info* gpu);
-char* get_str_cores_sm(struct gpu_info* gpu);
-char* get_str_cuda_cores(struct gpu_info* gpu);
+int32_t get_str_sm(struct gpu_info* gpu);
+int32_t get_str_cores_sm(struct gpu_info* gpu);
+int32_t get_str_cuda_cores(struct gpu_info* gpu);
 
 #endif
diff --git a/src/cuda/nvmlb.cpp b/src/cuda/nvmlb.cpp
index 1f566ea..357914c 100644
--- a/src/cuda/nvmlb.cpp
+++ b/src/cuda/nvmlb.cpp
@@ -37,7 +37,7 @@ bool nvml_get_pci_info(int dev, struct nvml_data* data) {
     return false;
   }
 
-  if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != result) {
+  if ((result = nvmlDeviceGetPciInfo(device, &data->pci)) != NVML_SUCCESS) {
     printErr("nvmlDeviceGetPciInfo: %s\n", nvmlErrorString(result));
     return false;
   }