[v0.06] Add check in memory guess to allow guessing only valid memory types for the uarch

This commit is contained in:
Dr-Noob
2021-08-17 15:09:13 +02:00
parent 6b2fa05870
commit a7036218a0
3 changed files with 20 additions and 5 deletions

View File

@@ -29,7 +29,7 @@ struct topology* get_topology_info(cudaDeviceProp prop) {
int32_t guess_clock_multipilier(struct gpu_info* gpu, struct memory* mem) { int32_t guess_clock_multipilier(struct gpu_info* gpu, struct memory* mem) {
// Guess clock multiplier // Guess clock multiplier
int32_t clk_mul = -1; int32_t clk_mul = 1;
int32_t clk8 = abs((mem->freq/8) - gpu->freq); int32_t clk8 = abs((mem->freq/8) - gpu->freq);
int32_t clk4 = abs((mem->freq/4) - gpu->freq); int32_t clk4 = abs((mem->freq/4) - gpu->freq);
@@ -37,10 +37,10 @@ int32_t guess_clock_multipilier(struct gpu_info* gpu, struct memory* mem) {
int32_t clk1 = abs((mem->freq/1) - gpu->freq); int32_t clk1 = abs((mem->freq/1) - gpu->freq);
int32_t min = mem->freq; int32_t min = mem->freq;
if(min > clk8) { clk_mul = 8; min = clk8; } if(clkm_possible_for_uarch(8, gpu->arch) && min > clk8) { clk_mul = 8; min = clk8; }
if(min > clk4) { clk_mul = 4; min = clk4; } if(clkm_possible_for_uarch(4, gpu->arch) && min > clk4) { clk_mul = 4; min = clk4; }
if(min > clk2) { clk_mul = 2; min = clk2; } if(clkm_possible_for_uarch(2, gpu->arch) && min > clk2) { clk_mul = 2; min = clk2; }
if(min > clk1) { clk_mul = 1; min = clk1; } if(clkm_possible_for_uarch(1, gpu->arch) && min > clk1) { clk_mul = 1; min = clk1; }
return clk_mul; return clk_mul;
} }

View File

@@ -275,6 +275,20 @@ struct uarch* get_uarch_from_cuda(struct gpu_info* gpu) {
else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype; else if (arch->uarch == arch_ && (clkm_ == CM_ANY || clkm == clkm_)) return memtype;
#define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid combination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; } #define CHECK_MEMTYPE_END else { printBug("guess_memtype_from_cmul_and_uarch: Found invalid combination: clkm=%d, uarch=%d", clkm, arch->uarch); return MEMTYPE_UNKNOWN; }
bool clkm_possible_for_uarch(int clkm, struct uarch* arch) {
switch(arch->uarch) {
case UARCH_TESLA: return false;
case UARCH_FERMI: return clkm == 1 || clkm == 2;
case UARCH_KEPLER: return clkm == 1 || clkm == 2;
case UARCH_MAXWELL: return clkm == 1 || clkm == 2;
case UARCH_PASCAL: return clkm == 1 || clkm == 2 || clkm == 4;
case UARCH_VOLTA: return clkm == 1;
case UARCH_TURING: return clkm == 2 || clkm == 4;
case UARCH_AMPERE: return clkm == 1 || clkm == 4 || clkm == 8;
}
return false;
}
MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) { MEMTYPE guess_memtype_from_cmul_and_uarch(int clkm, struct uarch* arch) {
/* /*
* +---------+------------------+ * +---------+------------------+

View File

@@ -6,6 +6,7 @@
struct uarch; struct uarch;
struct uarch* get_uarch_from_cuda(struct gpu_info* gpu); struct uarch* get_uarch_from_cuda(struct gpu_info* gpu);
bool clkm_possible_for_uarch(int clkm, struct uarch* arch);
MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch); MEMTYPE guess_memtype_from_cmul_and_uarch(int ddr, struct uarch* arch);
char* get_str_uarch(struct uarch* arch); char* get_str_uarch(struct uarch* arch);
char* get_str_cc(struct uarch* arch); char* get_str_cc(struct uarch* arch);