From 2687fa5016e885286a7577be3e76ec07a6f22e36 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Sun, 15 Aug 2021 21:22:41 +0200 Subject: [PATCH] [v0.02] Experimental algorithm to detect memory type finished. Information gathering finished --- src/common/gpu.cpp | 15 ++++++++++++- src/common/gpu.hpp | 9 +++++++- src/common/main.cpp | 2 +- src/cuda/cuda.cpp | 23 +++++++++---------- src/cuda/uarch.cpp | 54 +++++++++++++++++++++++++++++++++++++++++++++ src/cuda/uarch.hpp | 3 +++ 6 files changed, 91 insertions(+), 15 deletions(-) diff --git a/src/common/gpu.cpp b/src/common/gpu.cpp index e34223f..5c9d0bd 100644 --- a/src/common/gpu.cpp +++ b/src/common/gpu.cpp @@ -16,6 +16,17 @@ #define STRING_MEGABYTES "MB" #define STRING_GIGABYTES "GB" +static const char *memtype_str[] = { + /*[MEMTYPE_UNKNOWN] = */ STRING_UNKNOWN, + /*[MEMTYPE_DDR3] = */ "DDR3", + /*[MEMTYPE_DDR4] = */ "DDR4", + /*[MEMTYPE_GDDR5] = */ "GDDR5", + /*[MEMTYPE_GDDR5X] = */ "GDDR5X", + /*[MEMTYPE_GDDR6] = */ "GDDR6", + /*[MEMTYPE_GDDR6X] = */ "GDDR6X", + /*[MEMTYPE_HBM2] = */ "HBM2" +}; + int32_t get_value_as_smallest_unit(char ** str, uint64_t value) { int32_t ret; int max_len = 10; // Max is 8 for digits, 2 for units @@ -65,7 +76,9 @@ char* get_str_memory_size(struct gpu_info* gpu) { } char* get_str_memory_type(struct gpu_info* gpu) { - return NULL; + char* str = (char *) emalloc(sizeof(char) * (strlen(memtype_str[gpu->mem->type]) +1)); + strcpy(str, memtype_str[gpu->mem->type]); + return str; } char* get_str_bus_width(struct gpu_info* gpu) { diff --git a/src/common/gpu.hpp b/src/common/gpu.hpp index 9c58096..0725a7d 100644 --- a/src/common/gpu.hpp +++ b/src/common/gpu.hpp @@ -14,7 +14,14 @@ enum { }; enum { - MEMTYPE_GDDR6 + MEMTYPE_UNKNOWN, + MEMTYPE_DDR3, + MEMTYPE_DDR4, + MEMTYPE_GDDR5, + MEMTYPE_GDDR5X, + MEMTYPE_GDDR6, + MEMTYPE_GDDR6X, + MEMTYPE_HBM2 }; typedef int32_t VENDOR; diff --git a/src/common/main.cpp b/src/common/main.cpp index 88a974d..68aea15 100644 --- a/src/common/main.cpp +++ b/src/common/main.cpp @@ -7,7 +7,7 @@ #include "../cuda/cuda.hpp" #include "../cuda/uarch.hpp" -static const char* VERSION = "0.01"; +static const char* VERSION = "0.02"; void print_help(char *argv[]) { const char **t = args_str; diff --git a/src/cuda/cuda.cpp b/src/cuda/cuda.cpp index deb4f19..3af5ba8 100644 --- a/src/cuda/cuda.cpp +++ b/src/cuda/cuda.cpp @@ -28,21 +28,20 @@ struct topology* get_topology_info(struct gpu_info* gpu, cudaDeviceProp prop) { } MEMTYPE guess_memory_type(struct memory* mem, struct gpu_info* gpu) { - // 1. Guess data rate - int32_t data_rate = -1; - int32_t dr8 = abs((mem->freq/8) - gpu->freq); - int32_t dr4 = abs((mem->freq/4) - gpu->freq); - int32_t dr2 = abs((mem->freq/2) - gpu->freq); - int32_t dr1 = abs((mem->freq/1) - gpu->freq); + // Guess clock multiplier + int32_t clk_mul = -1; + int32_t clk8 = abs((mem->freq/8) - gpu->freq); + int32_t clk4 = abs((mem->freq/4) - gpu->freq); + int32_t clk2 = abs((mem->freq/2) - gpu->freq); + int32_t clk1 = abs((mem->freq/1) - gpu->freq); int32_t min = mem->freq; - if(min > dr8) { data_rate = 8; min = dr8; } - if(min > dr4) { data_rate = 4; min = dr4; } - if(min > dr2) { data_rate = 2; min = dr2; } - if(min > dr1) { data_rate = 1; min = dr1; } + if(min > clk8) { clk_mul = 8; min = clk8; } + if(min > clk4) { clk_mul = 4; min = clk4; } + if(min > clk2) { clk_mul = 2; min = clk2; } + if(min > clk1) { clk_mul = 1; min = clk1; } - printf("data_rate=%d\n", data_rate); - return MEMTYPE_GDDR6; + return guess_memtype_from_cmul_and_uarch(clk_mul, gpu->arch); } struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) { diff --git a/src/cuda/uarch.cpp b/src/cuda/uarch.cpp index b0c6746..00a1542 100644 --- a/src/cuda/uarch.cpp +++ b/src/cuda/uarch.cpp @@ -9,6 +9,9 @@ typedef uint32_t MICROARCH; +// Any clock multiplier +#define CM_ANY -1 + // Data not available #define NA -1 @@ -264,6 +267,57 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) { return arch; } +#define CHECK_MEMTYPE_START if (false) {} +#define CHECK_MEMTYPE(arch, clkm, arch_, clkm_, memtype) \ + else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype; +#define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid convination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; } + +MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) { + /* + * +---------+------------------+ + * | MEMTYPE | Clock multiplier | + * +---------+------------------+ + * | DDR3 | 1 | + * | DDR4 | 1 | + * | GDDR5 | 2 | + * | GDDR5X | 4 | + * | GDDR6 | 4 | + * | GDDR6X | 8 | + * | HBM | 1 | + * | HBM2 | 1 | + * +---------+------------------+ + * + * archs in parenthesis are not included in this rules + * and will be detected wrongly + */ + CHECK_MEMTYPE_START + // TESLA + CHECK_MEMTYPE(arch, clkm, UARCH_TESLA, CM_ANY, MEMTYPE_UNKNOWN) + // FERMI + CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 1, MEMTYPE_DDR3) + CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 2, MEMTYPE_GDDR5) + // KEPLER (jetson) + CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 1, MEMTYPE_DDR3) + CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 2, MEMTYPE_GDDR5) + // MAXWELL (switch, jetson) + CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 1, MEMTYPE_DDR3) + CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 2, MEMTYPE_GDDR5) + // PASCAL + CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 1, MEMTYPE_DDR4) + CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 2, MEMTYPE_GDDR5) + CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 4, MEMTYPE_GDDR5X) + // VOLTA (jetson) + CHECK_MEMTYPE(arch, clkm, UARCH_VOLTA, CM_ANY, MEMTYPE_HBM2) + // TURING + CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 2, MEMTYPE_GDDR5) + CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 4, MEMTYPE_GDDR6) + // AMPERE + CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 1, MEMTYPE_HBM2) + CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 4, MEMTYPE_GDDR6) + CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 8, MEMTYPE_GDDR6X) + CHECK_MEMTYPE_END +} + const char* get_str_uarch(struct uarch* arch) { return uarch_str[arch->uarch]; } diff --git a/src/cuda/uarch.hpp b/src/cuda/uarch.hpp index 97565bd..ad0446e 100644 --- a/src/cuda/uarch.hpp +++ b/src/cuda/uarch.hpp @@ -1,9 +1,12 @@ #ifndef __UARCH__ #define __UARCH__ +#include "../common/gpu.hpp" + struct uarch; struct uarch* get_uarch_from_cuda(struct gpu_info* gpu); +MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch); char* get_str_uarch(struct uarch* arch); char* get_str_cc(struct uarch* arch); char* get_str_chip(struct uarch* arch);