Commit 3997f308a068e5b335dd8a7be19078c8388414c9

Con Kolivas 2011-09-07T10:43:26

Add a thermal cutoff option as well and set it to 95 degrees by default.

diff --git a/adl.c b/adl.c
index 08789ea..efd8fe3 100644
--- a/adl.c
+++ b/adl.c
@@ -24,6 +24,7 @@ bool adl_active;
 int opt_hysteresis = 3;
 int opt_targettemp = 75;
 int opt_overheattemp = 85;
+int opt_cutofftemp = 95;
 static pthread_mutex_t adl_lock;
 
 // Memory allocation function
@@ -327,6 +328,7 @@ void init_adl(int nDevs)
 		/* Set some default temperatures for autotune when enabled */
 		ga->targettemp = opt_targettemp;
 		ga->overtemp = opt_overheattemp;
+		ga->cutofftemp = opt_cutofftemp;
 		if (opt_autofan) {
 			ga->autofan = true;
 			/* Set a safe starting default if we're automanaging fan speeds */
@@ -840,15 +842,12 @@ out:
 	return ret;
 }
 
-void gpu_autotune(int gpu)
+void gpu_autotune(int gpu, bool *enable)
 {
 	int temp, fanpercent, engine, newpercent, newengine;
 	bool fan_optimal = true;
 	struct gpu_adl *ga;
 
-	if (!gpus[gpu].has_adl || !adl_active)
-		return;
-
 	ga = &gpus[gpu].adl;
 
 	lock_adl();
@@ -886,7 +885,11 @@ void gpu_autotune(int gpu)
 	}
 
 	if (engine && ga->autoengine) {
-		if (temp > ga->overtemp && engine > ga->minspeed) {
+		if (temp > ga->cutofftemp) {
+			applog(LOG_WARNING, "Hit thermal cutoff limit, disabling GPU!");
+			*enable = false;
+			newengine = ga->minspeed;
+		} else if (temp > ga->overtemp && engine > ga->minspeed) {
 			applog(LOG_WARNING, "Overheat detected, decreasing GPU clock speed");
 			newengine = ga->minspeed;
 		} else if (temp > ga->targettemp + opt_hysteresis && engine > ga->minspeed && fan_optimal) {
@@ -943,8 +946,9 @@ void change_autosettings(int gpu)
 
 	wlogprint("Target temperature: %d\n", ga->targettemp);
 	wlogprint("Overheat temperature: %d\n", ga->overtemp);
+	wlogprint("Cutoff temperature: %d\n", ga->cutofftemp);
 	wlogprint("Hysteresis differece: %d\n", opt_hysteresis);
-	wlogprint("Toggle [F]an auto [G]PU auto\nChange [T]arget [O]verheat [H]ysteresis\n");
+	wlogprint("Toggle [F]an auto [G]PU auto\nChange [T]arget [O]verheat [C]utoff [H]ysteresis\n");
 	wlogprint("Or press any other key to continue\n");
 	input = getch();
 	if (!strncasecmp(&input, "f", 1)) {
@@ -968,12 +972,19 @@ void change_autosettings(int gpu)
 		else
 			ga->targettemp = val;
 	} else if (!strncasecmp(&input, "o", 1)) {
-		wlogprint("Enter oveheat temperature for this GPU in C (%d-100)", ga->targettemp);
+		wlogprint("Enter overheat temperature for this GPU in C (%d+)", ga->targettemp);
 		val = curses_int("");
-		if (val <= ga->targettemp || val > 100)
+		if (val <= ga->targettemp || val > 200)
 			wlogprint("Invalid temperature");
 		else
 			ga->overtemp = val;
+	} else if (!strncasecmp(&input, "c", 1)) {
+		wlogprint("Enter cutoff temperature for this GPU in C (%d+)", ga->overtemp);
+		val = curses_int("");
+		if (val <= ga->overtemp || val > 200)
+			wlogprint("Invalid temperature");
+		else
+			ga->cutofftemp = val;
 	} else if (!strncasecmp(&input, "h", 1)) {
 		val = curses_int("Enter hysteresis temperature difference (0-10)");
 		if (val < 1 || val > 10)
diff --git a/adl.h b/adl.h
index f3d95e9..a86b78c 100644
--- a/adl.h
+++ b/adl.h
@@ -5,6 +5,7 @@ bool adl_active;
 int opt_hysteresis;
 int opt_targettemp;
 int opt_overheattemp;
+int opt_cutofftemp;
 void init_adl(int nDevs);
 float gpu_temp(int gpu);
 int gpu_engineclock(int gpu);
@@ -16,13 +17,12 @@ int gpu_fanpercent(int gpu);
 bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
 	       int *activity, int *fanspeed, int *fanpercent, int *powertune);
 void change_gpusettings(int gpu);
-void gpu_autotune(int gpu);
+void gpu_autotune(int gpu, bool *enable);
 void clear_adl(int nDevs);
 #else /* HAVE_ADL */
 #define adl_active (0)
 static inline void init_adl(int nDevs) {}
 static inline void change_gpusettings(int gpu) { }
-static inline void gpu_autotune(int gpu) { }
 static inline void clear_adl(int nDevs) {}
 #endif
 #endif
diff --git a/main.c b/main.c
index 4303689..74ecf70 100644
--- a/main.c
+++ b/main.c
@@ -1412,6 +1412,9 @@ static struct opt_table opt_config_table[] = {
 			"Use system log for output messages (default: standard error)"),
 #endif
 #ifdef HAVE_ADL
+	OPT_WITH_ARG("--temp-cutoff",
+		     set_int_0_to_9999, opt_show_intval, &opt_cutofftemp,
+		     "Set the temperature where a GPU device will be automatically disabled"),
 	OPT_WITH_ARG("--temp-hysteresis",
 		     set_int_1_to_10, opt_show_intval, &opt_hysteresis,
 		     "Set how much the temperature can fluctuate outside limits when automanaging speeds"),
@@ -4573,6 +4576,7 @@ static void *watchdog_thread(void *userdata)
 
 		for (i = 0; i < gpu_threads; i++) {
 			struct thr_info *thr;
+			bool *enable;
 			int gpu;
 
 			/* Use only one thread per device to determine if the GPU is healthy */
@@ -4580,9 +4584,10 @@ static void *watchdog_thread(void *userdata)
 				break;
 			thr = &thr_info[i];
 			gpu = thr->cgpu->cpu_gpu;
+			enable = &gpu_devices[gpu];
 #ifdef HAVE_ADL
-			if (adl_active)
-				gpu_autotune(gpu);
+			if (adl_active && gpus[gpu].has_adl && *enable)
+				gpu_autotune(gpu, enable);
 			if (opt_debug && gpus[gpu].has_adl) {
 				int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
 				float temp = 0, vddc = 0;
@@ -4593,7 +4598,7 @@ static void *watchdog_thread(void *userdata)
 			}
 #endif
 			/* Thread is waiting on getwork or disabled */
-			if (thr->getwork || !gpu_devices[gpu])
+			if (thr->getwork || !*enable)
 				continue;
 
 			if (gpus[gpu].status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) {
diff --git a/miner.h b/miner.h
index 2dc1d16..5d9e22f 100644
--- a/miner.h
+++ b/miner.h
@@ -180,6 +180,7 @@ struct gpu_adl {
 
 	int targettemp;
 	int overtemp;
+	int cutofftemp;
 	int minspeed;
 	int maxspeed;
 };