[v0.02] Experimental algorithm to detect memory type finished. Information gathering finished
This commit is contained in:
@@ -16,6 +16,17 @@
|
|||||||
#define STRING_MEGABYTES "MB"
|
#define STRING_MEGABYTES "MB"
|
||||||
#define STRING_GIGABYTES "GB"
|
#define STRING_GIGABYTES "GB"
|
||||||
|
|
||||||
|
static const char *memtype_str[] = {
|
||||||
|
/*[MEMTYPE_UNKNOWN] = */ STRING_UNKNOWN,
|
||||||
|
/*[MEMTYPE_DDR3] = */ "DDR3",
|
||||||
|
/*[MEMTYPE_DDR4] = */ "DDR4",
|
||||||
|
/*[MEMTYPE_GDDR5] = */ "GDDR5",
|
||||||
|
/*[MEMTYPE_GDDR5X] = */ "GDDR5X",
|
||||||
|
/*[MEMTYPE_GDDR6] = */ "GDDR6",
|
||||||
|
/*[MEMTYPE_GDDR6X] = */ "GDDR6X",
|
||||||
|
/*[MEMTYPE_HBM2] = */ "HBM2"
|
||||||
|
};
|
||||||
|
|
||||||
int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
|
int32_t get_value_as_smallest_unit(char ** str, uint64_t value) {
|
||||||
int32_t ret;
|
int32_t ret;
|
||||||
int max_len = 10; // Max is 8 for digits, 2 for units
|
int max_len = 10; // Max is 8 for digits, 2 for units
|
||||||
@@ -65,7 +76,9 @@ char* get_str_memory_size(struct gpu_info* gpu) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_memory_type(struct gpu_info* gpu) {
|
char* get_str_memory_type(struct gpu_info* gpu) {
|
||||||
return NULL;
|
char* str = (char *) emalloc(sizeof(char) * (strlen(memtype_str[gpu->mem->type]) +1));
|
||||||
|
strcpy(str, memtype_str[gpu->mem->type]);
|
||||||
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
char* get_str_bus_width(struct gpu_info* gpu) {
|
char* get_str_bus_width(struct gpu_info* gpu) {
|
||||||
|
|||||||
@@ -14,7 +14,14 @@ enum {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
MEMTYPE_GDDR6
|
MEMTYPE_UNKNOWN,
|
||||||
|
MEMTYPE_DDR3,
|
||||||
|
MEMTYPE_DDR4,
|
||||||
|
MEMTYPE_GDDR5,
|
||||||
|
MEMTYPE_GDDR5X,
|
||||||
|
MEMTYPE_GDDR6,
|
||||||
|
MEMTYPE_GDDR6X,
|
||||||
|
MEMTYPE_HBM2
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef int32_t VENDOR;
|
typedef int32_t VENDOR;
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
#include "../cuda/cuda.hpp"
|
#include "../cuda/cuda.hpp"
|
||||||
#include "../cuda/uarch.hpp"
|
#include "../cuda/uarch.hpp"
|
||||||
|
|
||||||
static const char* VERSION = "0.01";
|
static const char* VERSION = "0.02";
|
||||||
|
|
||||||
void print_help(char *argv[]) {
|
void print_help(char *argv[]) {
|
||||||
const char **t = args_str;
|
const char **t = args_str;
|
||||||
|
|||||||
@@ -28,21 +28,20 @@ struct topology* get_topology_info(struct gpu_info* gpu, cudaDeviceProp prop) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
MEMTYPE guess_memory_type(struct memory* mem, struct gpu_info* gpu) {
|
MEMTYPE guess_memory_type(struct memory* mem, struct gpu_info* gpu) {
|
||||||
// 1. Guess data rate
|
// Guess clock multiplier
|
||||||
int32_t data_rate = -1;
|
int32_t clk_mul = -1;
|
||||||
int32_t dr8 = abs((mem->freq/8) - gpu->freq);
|
int32_t clk8 = abs((mem->freq/8) - gpu->freq);
|
||||||
int32_t dr4 = abs((mem->freq/4) - gpu->freq);
|
int32_t clk4 = abs((mem->freq/4) - gpu->freq);
|
||||||
int32_t dr2 = abs((mem->freq/2) - gpu->freq);
|
int32_t clk2 = abs((mem->freq/2) - gpu->freq);
|
||||||
int32_t dr1 = abs((mem->freq/1) - gpu->freq);
|
int32_t clk1 = abs((mem->freq/1) - gpu->freq);
|
||||||
|
|
||||||
int32_t min = mem->freq;
|
int32_t min = mem->freq;
|
||||||
if(min > dr8) { data_rate = 8; min = dr8; }
|
if(min > clk8) { clk_mul = 8; min = clk8; }
|
||||||
if(min > dr4) { data_rate = 4; min = dr4; }
|
if(min > clk4) { clk_mul = 4; min = clk4; }
|
||||||
if(min > dr2) { data_rate = 2; min = dr2; }
|
if(min > clk2) { clk_mul = 2; min = clk2; }
|
||||||
if(min > dr1) { data_rate = 1; min = dr1; }
|
if(min > clk1) { clk_mul = 1; min = clk1; }
|
||||||
|
|
||||||
printf("data_rate=%d\n", data_rate);
|
return guess_memtype_from_cmul_and_uarch(clk_mul, gpu->arch);
|
||||||
return MEMTYPE_GDDR6;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
|
struct memory* get_memory_info(struct gpu_info* gpu, cudaDeviceProp prop) {
|
||||||
|
|||||||
@@ -9,6 +9,9 @@
|
|||||||
|
|
||||||
typedef uint32_t MICROARCH;
|
typedef uint32_t MICROARCH;
|
||||||
|
|
||||||
|
// Any clock multiplier
|
||||||
|
#define CM_ANY -1
|
||||||
|
|
||||||
// Data not available
|
// Data not available
|
||||||
#define NA -1
|
#define NA -1
|
||||||
|
|
||||||
@@ -264,6 +267,57 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
|
|||||||
return arch;
|
return arch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CHECK_MEMTYPE_START if (false) {}
|
||||||
|
#define CHECK_MEMTYPE(arch, clkm, arch_, clkm_, memtype) \
|
||||||
|
else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype;
|
||||||
|
#define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid convination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; }
|
||||||
|
|
||||||
|
MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
|
||||||
|
/*
|
||||||
|
* +---------+------------------+
|
||||||
|
* | MEMTYPE | Clock multiplier |
|
||||||
|
* +---------+------------------+
|
||||||
|
* | DDR3 | 1 |
|
||||||
|
* | DDR4 | 1 |
|
||||||
|
* | GDDR5 | 2 |
|
||||||
|
* | GDDR5X | 4 |
|
||||||
|
* | GDDR6 | 4 |
|
||||||
|
* | GDDR6X | 8 |
|
||||||
|
* | HBM | 1 |
|
||||||
|
* | HBM2 | 1 |
|
||||||
|
* +---------+------------------+
|
||||||
|
*
|
||||||
|
* archs in parenthesis are not included in this rules
|
||||||
|
* and will be detected wrongly
|
||||||
|
*/
|
||||||
|
CHECK_MEMTYPE_START
|
||||||
|
// TESLA
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_TESLA, CM_ANY, MEMTYPE_UNKNOWN)
|
||||||
|
// FERMI
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 1, MEMTYPE_DDR3)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_FERMI, 2, MEMTYPE_GDDR5)
|
||||||
|
// KEPLER (jetson)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 1, MEMTYPE_DDR3)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_KEPLER, 2, MEMTYPE_GDDR5)
|
||||||
|
// MAXWELL (switch, jetson)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 1, MEMTYPE_DDR3)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_MAXWELL, 2, MEMTYPE_GDDR5)
|
||||||
|
// PASCAL
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 1, MEMTYPE_DDR4)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 2, MEMTYPE_GDDR5)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_PASCAL, 4, MEMTYPE_GDDR5X)
|
||||||
|
// VOLTA (jetson)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_VOLTA, CM_ANY, MEMTYPE_HBM2)
|
||||||
|
// TURING
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 2, MEMTYPE_GDDR5)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_TURING, 4, MEMTYPE_GDDR6)
|
||||||
|
// AMPERE
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 1, MEMTYPE_HBM2)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 4, MEMTYPE_GDDR6)
|
||||||
|
CHECK_MEMTYPE(arch, clkm, UARCH_AMPERE, 8, MEMTYPE_GDDR6X)
|
||||||
|
CHECK_MEMTYPE_END
|
||||||
|
}
|
||||||
|
|
||||||
const char* get_str_uarch(struct uarch* arch) {
|
const char* get_str_uarch(struct uarch* arch) {
|
||||||
return uarch_str[arch->uarch];
|
return uarch_str[arch->uarch];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
#ifndef __UARCH__
|
#ifndef __UARCH__
|
||||||
#define __UARCH__
|
#define __UARCH__
|
||||||
|
|
||||||
|
#include "../common/gpu.hpp"
|
||||||
|
|
||||||
struct uarch;
|
struct uarch;
|
||||||
|
|
||||||
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
|
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
|
||||||
|
MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
|
||||||
char* get_str_uarch(struct uarch* arch);
|
char* get_str_uarch(struct uarch* arch);
|
||||||
char* get_str_cc(struct uarch* arch);
|
char* get_str_cc(struct uarch* arch);
|
||||||
char* get_str_chip(struct uarch* arch);
|
char* get_str_chip(struct uarch* arch);
|
||||||
|
|||||||
Reference in New Issue
Block a user