[v0.02] Experimental algorithm to detect memory type finished. Information gathering finished

This commit is contained in:
Dr-Noob
2021-08-15 21:22:41 +02:00
parent 7d505dcc07
commit 2687fa5016
6 changed files with 91 additions and 15 deletions

View File

@@ -28,21 +28,20 @@ struct topology* get_topology_info(struct gpu_info* gpu, cudaDeviceProp prop) {
}
MEMTYPE guess_memory_type(struct memory* mem, struct gpu_info* gpu) {
// 1. Guess data rate
int32_t data_rate = -1;
int32_t dr8 = abs((mem->freq/8) - gpu->freq);
int32_t dr4 = abs((mem->freq/4) - gpu->freq);
int32_t dr2 = abs((mem->freq/2) - gpu->freq);
int32_t dr1 = abs((mem->freq/1) - gpu->freq);
// Guess clock multiplier
int32_t clk_mul = -1;
int32_t clk8 = abs((mem->freq/8) - gpu->freq);
int32_t clk4 = abs((mem->freq/4) - gpu->freq);
int32_t clk2 = abs((mem->freq/2) - gpu->freq);
int32_t clk1 = abs((mem->freq/1) - gpu->freq);
int32_t min = mem->freq;
if(min > dr8) { data_rate = 8; min = dr8; }
if(min > dr4) { data_rate = 4; min = dr4; }
if(min > dr2) { data_rate = 2; min = dr2; }
if(min > dr1) { data_rate = 1; min = dr1; }
if(min > clk8) { clk_mul = 8; min = clk8; }
if(min > clk4) { clk_mul = 4; min = clk4; }
if(min > clk2) { clk_mul = 2; min = clk2; }
if(min > clk1) { clk_mul = 1; min = clk1; }
printf("data_rate=%d\n", data_rate);
return MEMTYPE_GDDR6;
return guess_memtype_from_cmul_and_uarch(clk_mul, gpu->arch);
}
struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {

View File

@@ -9,6 +9,9 @@
typedef uint32_t MICROARCH;
// Any clock multiplier
#define CM_ANY -1
// Data not available
#define NA -1
@@ -264,6 +267,57 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
return arch;
}
#define CHECK_MEMTYPE_START if (false) {}
#define CHECK_MEMTYPE(arch, clkm, arch_, clkm_, memtype) \
else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype;
#define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid convination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; }
MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
/*
* +---------+------------------+
* | MEMTYPE | Clock multiplier |
* +---------+------------------+
* | DDR3 | 1 |
* | DDR4 | 1 |
* | GDDR5 | 2 |
* | GDDR5X | 4 |
* | GDDR6 | 4 |
* | GDDR6X | 8 |
* | HBM | 1 |
* | HBM2 | 1 |
* +---------+------------------+
*
* archs in parenthesis are not included in this rules
* and will be detected wrongly
*/
CHECK_MEMTYPE_START
// TESLA
CHECK_MEMTYPE(arch, clkm, UARCH_TESLA, CM_ANY, MEMTYPE_UNKNOWN)
// FERMI
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 2, MEMTYPE_GDDR5)
// KEPLER (jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 2, MEMTYPE_GDDR5)
// MAXWELL (switch, jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 2, MEMTYPE_GDDR5)
// PASCAL
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 1, MEMTYPE_DDR4)
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 2, MEMTYPE_GDDR5)
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 4, MEMTYPE_GDDR5X)
// VOLTA (jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_VOLTA, CM_ANY, MEMTYPE_HBM2)
// TURING
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 2, MEMTYPE_GDDR5)
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 4, MEMTYPE_GDDR6)
// AMPERE
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 1, MEMTYPE_HBM2)
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 4, MEMTYPE_GDDR6)
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 8, MEMTYPE_GDDR6X)
CHECK_MEMTYPE_END
}
const char* get_str_uarch(struct uarch* arch) {
return uarch_str[arch->uarch];
}

View File

@@ -1,9 +1,12 @@
#ifndef __UARCH__
#define __UARCH__
#include "../common/gpu.hpp"
struct uarch;
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
char* get_str_uarch(struct uarch* arch);
char* get_str_cc(struct uarch* arch);
char* get_str_chip(struct uarch* arch);