[v0.24] Remove cuda-samples dependency

2022-04-17 13:55:05 +02:00
parent 312d78b7f1
commit af52d2850c
5 changed files with 70 additions and 12 deletions
--- a/src/common/main.cpp
+++ b/src/common/main.cpp
@@ -8,7 +8,7 @@
 #include "../cuda/cuda.hpp"
 #include "../cuda/uarch.hpp"

-static const char* VERSION = "0.23";
+static const char* VERSION = "0.24";

 void print_help(char *argv[]) {
  const char **t = args_str;
--- a/src/cuda/cuda.cpp
+++ b/src/cuda/cuda.cpp
@@ -1,8 +1,11 @@
-#include <helper_cuda.h>
 #include <cuda_runtime.h>
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>

 #include "cuda.hpp"
 #include "uarch.hpp"
+#include "gpufetch_helper_cuda.hpp"
 #include "../common/pci.hpp"
 #include "../common/global.hpp"
 #include "../common/uarch.hpp"
--- a/src/cuda/gpufetch_helper_cuda.hpp
+++ b/src/cuda/gpufetch_helper_cuda.hpp
@@ -0,0 +1,60 @@
+#ifndef __GPUFETCH_HELPER_CUDA__
+#define __GPUFETCH_HELPER_CUDA__
+
+// gpufetch self contained helper_cuda.h
+//
+// Avoids relying on helper_cuda.h, which is
+// often very hard to include properly, causing
+// compilation issues.
+//
+// URL: https://github.com/NVIDIA/cuda-samples
+// Commit: 2e41896
+
+inline int _ConvertSMVer2Cores(int major, int minor) {
+  // Defines for GPU Architecture types (using the SM version to determine
+  // the # of cores per SM
+  typedef struct {
+    int SM;  // 0xMm (hexidecimal notation), M = SM Major version,
+    // and m = SM minor version
+    int Cores;
+  } sSMtoCores;
+
+  sSMtoCores nGpuArchCoresPerSM[] = {
+      {0x30, 192},
+      {0x32, 192},
+      {0x35, 192},
+      {0x37, 192},
+      {0x50, 128},
+      {0x52, 128},
+      {0x53, 128},
+      {0x60,  64},
+      {0x61, 128},
+      {0x62, 128},
+      {0x70,  64},
+      {0x72,  64},
+      {0x75,  64},
+      {0x80,  64},
+      {0x86, 128},
+      {0x87, 128},
+      {-1, -1}};
+
+  int index = 0;
+
+  while (nGpuArchCoresPerSM[index].SM != -1) {
+    if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
+      return nGpuArchCoresPerSM[index].Cores;
+    }
+
+    index++;
+  }
+
+  // If we don't find the values, we default use the previous one
+  // to run properly
+  printf(
+      "MapSMtoCores for SM %d.%d is undefined."
+      "  Default to use %d Cores/SM\n",
+      major, minor, nGpuArchCoresPerSM[index - 1].Cores);
+  return nGpuArchCoresPerSM[index - 1].Cores;
+}
+
+#endif
--- a/src/cuda/uarch.cpp
+++ b/src/cuda/uarch.cpp
@@ -1,7 +1,9 @@
 #include <cuda_runtime.h>
-#include <helper_cuda.h>
+#include <cstdlib>
 #include <cstdint>
 #include <cstddef>
+#include <cstdio>
+#include <cstring>

 #include "../common/uarch.hpp"
 #include "../common/global.hpp"
@@ -329,6 +331,7 @@ char* get_str_chip(struct uarch* arch) {
  return arch->chip_str;
 }

+// TODO: What about _ConvertSMVer2ArchName?
 const char* get_str_uarch_cuda(struct uarch* arch) {
  return uarch_str[arch->uarch];
 }