Add a thermal cutoff option as well and set it to 95 degrees by default.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
diff --git a/adl.c b/adl.c
index 08789ea..efd8fe3 100644
--- a/adl.c
+++ b/adl.c
@@ -24,6 +24,7 @@ bool adl_active;
int opt_hysteresis = 3;
int opt_targettemp = 75;
int opt_overheattemp = 85;
+int opt_cutofftemp = 95;
static pthread_mutex_t adl_lock;
// Memory allocation function
@@ -327,6 +328,7 @@ void init_adl(int nDevs)
/* Set some default temperatures for autotune when enabled */
ga->targettemp = opt_targettemp;
ga->overtemp = opt_overheattemp;
+ ga->cutofftemp = opt_cutofftemp;
if (opt_autofan) {
ga->autofan = true;
/* Set a safe starting default if we're automanaging fan speeds */
@@ -840,15 +842,12 @@ out:
return ret;
}
-void gpu_autotune(int gpu)
+void gpu_autotune(int gpu, bool *enable)
{
int temp, fanpercent, engine, newpercent, newengine;
bool fan_optimal = true;
struct gpu_adl *ga;
- if (!gpus[gpu].has_adl || !adl_active)
- return;
-
ga = &gpus[gpu].adl;
lock_adl();
@@ -886,7 +885,11 @@ void gpu_autotune(int gpu)
}
if (engine && ga->autoengine) {
- if (temp > ga->overtemp && engine > ga->minspeed) {
+ if (temp > ga->cutofftemp) {
+ applog(LOG_WARNING, "Hit thermal cutoff limit, disabling GPU!");
+ *enable = false;
+ newengine = ga->minspeed;
+ } else if (temp > ga->overtemp && engine > ga->minspeed) {
applog(LOG_WARNING, "Overheat detected, decreasing GPU clock speed");
newengine = ga->minspeed;
} else if (temp > ga->targettemp + opt_hysteresis && engine > ga->minspeed && fan_optimal) {
@@ -943,8 +946,9 @@ void change_autosettings(int gpu)
wlogprint("Target temperature: %d\n", ga->targettemp);
wlogprint("Overheat temperature: %d\n", ga->overtemp);
+ wlogprint("Cutoff temperature: %d\n", ga->cutofftemp);
wlogprint("Hysteresis differece: %d\n", opt_hysteresis);
- wlogprint("Toggle [F]an auto [G]PU auto\nChange [T]arget [O]verheat [H]ysteresis\n");
+ wlogprint("Toggle [F]an auto [G]PU auto\nChange [T]arget [O]verheat [C]utoff [H]ysteresis\n");
wlogprint("Or press any other key to continue\n");
input = getch();
if (!strncasecmp(&input, "f", 1)) {
@@ -968,12 +972,19 @@ void change_autosettings(int gpu)
else
ga->targettemp = val;
} else if (!strncasecmp(&input, "o", 1)) {
- wlogprint("Enter oveheat temperature for this GPU in C (%d-100)", ga->targettemp);
+ wlogprint("Enter overheat temperature for this GPU in C (%d+)", ga->targettemp);
val = curses_int("");
- if (val <= ga->targettemp || val > 100)
+ if (val <= ga->targettemp || val > 200)
wlogprint("Invalid temperature");
else
ga->overtemp = val;
+ } else if (!strncasecmp(&input, "c", 1)) {
+ wlogprint("Enter cutoff temperature for this GPU in C (%d+)", ga->overtemp);
+ val = curses_int("");
+ if (val <= ga->overtemp || val > 200)
+ wlogprint("Invalid temperature");
+ else
+ ga->cutofftemp = val;
} else if (!strncasecmp(&input, "h", 1)) {
val = curses_int("Enter hysteresis temperature difference (0-10)");
if (val < 1 || val > 10)
diff --git a/adl.h b/adl.h
index f3d95e9..a86b78c 100644
--- a/adl.h
+++ b/adl.h
@@ -5,6 +5,7 @@ bool adl_active;
int opt_hysteresis;
int opt_targettemp;
int opt_overheattemp;
+int opt_cutofftemp;
void init_adl(int nDevs);
float gpu_temp(int gpu);
int gpu_engineclock(int gpu);
@@ -16,13 +17,12 @@ int gpu_fanpercent(int gpu);
bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
int *activity, int *fanspeed, int *fanpercent, int *powertune);
void change_gpusettings(int gpu);
-void gpu_autotune(int gpu);
+void gpu_autotune(int gpu, bool *enable);
void clear_adl(int nDevs);
#else /* HAVE_ADL */
#define adl_active (0)
static inline void init_adl(int nDevs) {}
static inline void change_gpusettings(int gpu) { }
-static inline void gpu_autotune(int gpu) { }
static inline void clear_adl(int nDevs) {}
#endif
#endif
diff --git a/main.c b/main.c
index 4303689..74ecf70 100644
--- a/main.c
+++ b/main.c
@@ -1412,6 +1412,9 @@ static struct opt_table opt_config_table[] = {
"Use system log for output messages (default: standard error)"),
#endif
#ifdef HAVE_ADL
+ OPT_WITH_ARG("--temp-cutoff",
+ set_int_0_to_9999, opt_show_intval, &opt_cutofftemp,
+ "Set the temperature where a GPU device will be automatically disabled"),
OPT_WITH_ARG("--temp-hysteresis",
set_int_1_to_10, opt_show_intval, &opt_hysteresis,
"Set how much the temperature can fluctuate outside limits when automanaging speeds"),
@@ -4573,6 +4576,7 @@ static void *watchdog_thread(void *userdata)
for (i = 0; i < gpu_threads; i++) {
struct thr_info *thr;
+ bool *enable;
int gpu;
/* Use only one thread per device to determine if the GPU is healthy */
@@ -4580,9 +4584,10 @@ static void *watchdog_thread(void *userdata)
break;
thr = &thr_info[i];
gpu = thr->cgpu->cpu_gpu;
+ enable = &gpu_devices[gpu];
#ifdef HAVE_ADL
- if (adl_active)
- gpu_autotune(gpu);
+ if (adl_active && gpus[gpu].has_adl && *enable)
+ gpu_autotune(gpu, enable);
if (opt_debug && gpus[gpu].has_adl) {
int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
float temp = 0, vddc = 0;
@@ -4593,7 +4598,7 @@ static void *watchdog_thread(void *userdata)
}
#endif
/* Thread is waiting on getwork or disabled */
- if (thr->getwork || !gpu_devices[gpu])
+ if (thr->getwork || !*enable)
continue;
if (gpus[gpu].status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) {
diff --git a/miner.h b/miner.h
index 2dc1d16..5d9e22f 100644
--- a/miner.h
+++ b/miner.h
@@ -180,6 +180,7 @@ struct gpu_adl {
int targettemp;
int overtemp;
+ int cutofftemp;
int minspeed;
int maxspeed;
};