[v0.02] Experimental algorithm to detect memory type finished. Information gathering finished

This commit is contained in:
Dr-Noob
2021-08-15 21:22:41 +02:00
parent 7d505dcc07
commit 2687fa5016
6 changed files with 91 additions and 15 deletions

View File

@@ -16,6 +16,17 @@
#define STRING_MEGABYTES "MB"
#define STRING_GIGABYTES "GB"
static const char *memtype_str[] = {
/*[MEMTYPE_UNKNOWN] = */ STRING_UNKNOWN,
/*[MEMTYPE_DDR3] = */ "DDR3",
/*[MEMTYPE_DDR4] = */ "DDR4",
/*[MEMTYPE_GDDR5] = */ "GDDR5",
/*[MEMTYPE_GDDR5X] = */ "GDDR5X",
/*[MEMTYPE_GDDR6] = */ "GDDR6",
/*[MEMTYPE_GDDR6X] = */ "GDDR6X",
/*[MEMTYPE_HBM2] = */ "HBM2"
};
int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
int32_t ret;
int max_len = 10; // Max is 8 for digits, 2 for units
@@ -65,7 +76,9 @@ char* get_str_memory_size(struct gpu_info* gpu) {
}
char* get_str_memory_type(struct gpu_info* gpu) {
return NULL;
char* str = (char *) emalloc(sizeof(char) * (strlen(memtype_str[gpu->mem->type]) +1));
strcpy(str, memtype_str[gpu->mem->type]);
return str;
}
char* get_str_bus_width(struct gpu_info* gpu) {

View File

@@ -14,7 +14,14 @@ enum {
};
enum {
MEMTYPE_GDDR6
MEMTYPE_UNKNOWN,
MEMTYPE_DDR3,
MEMTYPE_DDR4,
MEMTYPE_GDDR5,
MEMTYPE_GDDR5X,
MEMTYPE_GDDR6,
MEMTYPE_GDDR6X,
MEMTYPE_HBM2
};
typedef int32_t VENDOR;

View File

@@ -7,7 +7,7 @@
#include "../cuda/cuda.hpp"
#include "../cuda/uarch.hpp"
static const char* VERSION = "0.01";
static const char* VERSION = "0.02";
void print_help(char *argv[]) {
const char **t = args_str;

View File

@@ -28,21 +28,20 @@ struct topology* get_topology_info(struct gpu_info* gpu, cudaDeviceProp prop) {
}
MEMTYPE guess_memory_type(struct memory* mem, struct gpu_info* gpu) {
// 1. Guess data rate
int32_t data_rate = -1;
int32_t dr8 = abs((mem->freq/8) - gpu->freq);
int32_t dr4 = abs((mem->freq/4) - gpu->freq);
int32_t dr2 = abs((mem->freq/2) - gpu->freq);
int32_t dr1 = abs((mem->freq/1) - gpu->freq);
// Guess clock multiplier
int32_t clk_mul = -1;
int32_t clk8 = abs((mem->freq/8) - gpu->freq);
int32_t clk4 = abs((mem->freq/4) - gpu->freq);
int32_t clk2 = abs((mem->freq/2) - gpu->freq);
int32_t clk1 = abs((mem->freq/1) - gpu->freq);
int32_t min = mem->freq;
if(min > dr8) { data_rate = 8; min = dr8; }
if(min > dr4) { data_rate = 4; min = dr4; }
if(min > dr2) { data_rate = 2; min = dr2; }
if(min > dr1) { data_rate = 1; min = dr1; }
if(min > clk8) { clk_mul = 8; min = clk8; }
if(min > clk4) { clk_mul = 4; min = clk4; }
if(min > clk2) { clk_mul = 2; min = clk2; }
if(min > clk1) { clk_mul = 1; min = clk1; }
printf("data_rate=%d\n", data_rate);
return MEMTYPE_GDDR6;
return guess_memtype_from_cmul_and_uarch(clk_mul, gpu->arch);
}
struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {

View File

@@ -9,6 +9,9 @@
typedef uint32_t MICROARCH;
// Any clock multiplier
#define CM_ANY -1
// Data not available
#define NA -1
@@ -264,6 +267,57 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
return arch;
}
#define CHECK_MEMTYPE_START if (false) {}
#define CHECK_MEMTYPE(arch, clkm, arch_, clkm_, memtype) \
else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype;
#define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid convination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; }
MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
/*
* +---------+------------------+
* | MEMTYPE | Clock multiplier |
* +---------+------------------+
* | DDR3 | 1 |
* | DDR4 | 1 |
* | GDDR5 | 2 |
* | GDDR5X | 4 |
* | GDDR6 | 4 |
* | GDDR6X | 8 |
* | HBM | 1 |
* | HBM2 | 1 |
* +---------+------------------+
*
* archs in parenthesis are not included in this rules
* and will be detected wrongly
*/
CHECK_MEMTYPE_START
// TESLA
CHECK_MEMTYPE(arch, clkm, UARCH_TESLA, CM_ANY, MEMTYPE_UNKNOWN)
// FERMI
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 2, MEMTYPE_GDDR5)
// KEPLER (jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 2, MEMTYPE_GDDR5)
// MAXWELL (switch, jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 1, MEMTYPE_DDR3)
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 2, MEMTYPE_GDDR5)
// PASCAL
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 1, MEMTYPE_DDR4)
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 2, MEMTYPE_GDDR5)
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 4, MEMTYPE_GDDR5X)
// VOLTA (jetson)
CHECK_MEMTYPE(arch, clkm, UARCH_VOLTA, CM_ANY, MEMTYPE_HBM2)
// TURING
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 2, MEMTYPE_GDDR5)
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 4, MEMTYPE_GDDR6)
// AMPERE
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 1, MEMTYPE_HBM2)
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 4, MEMTYPE_GDDR6)
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 8, MEMTYPE_GDDR6X)
CHECK_MEMTYPE_END
}
const char* get_str_uarch(struct uarch* arch) {
return uarch_str[arch->uarch];
}

View File

@@ -1,9 +1,12 @@
#ifndef __UARCH__
#define __UARCH__
#include "../common/gpu.hpp"
struct uarch;
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
char* get_str_uarch(struct uarch* arch);
char* get_str_cc(struct uarch* arch);
char* get_str_chip(struct uarch* arch);