Commit 2d2e8d61e897604aec88eb1c6a4f205df5d386e0

Kano 2014-03-22T17:15:40

ants1 - slow down mining if overheat occurs

diff --git a/driver-bitmain.c b/driver-bitmain.c
index 9dc88b7..20d3598 100644
--- a/driver-bitmain.c
+++ b/driver-bitmain.c
@@ -748,6 +748,7 @@ static inline void record_temp_fan(struct bitmain_info *info, struct bitmain_rxs
 		info->fan[i] = bm->fan[i] * BITMAIN_FAN_FACTOR;
 	}
 	info->temp_num = bm->temp_num;
+	info->temp_hi = 0;
 	for (i = 0; i < bm->temp_num; i++) {
 		info->temp[i] = bm->temp[i];
 		/*
@@ -757,9 +758,10 @@ static inline void record_temp_fan(struct bitmain_info *info, struct bitmain_rxs
 		}*/
 		*temp_avg += info->temp[i];
 
-		if (info->temp[i] > info->temp_max) {
+		if (info->temp[i] > info->temp_max)
 			info->temp_max = info->temp[i];
-		}
+		if (info->temp[i] > info->temp_hi)
+			info->temp_hi = info->temp[i];
 	}
 
 	if (bm->temp_num > 0) {
@@ -804,14 +806,21 @@ static void bitmain_update_temps(struct cgpu_info *bitmain, struct bitmain_info 
 		info->temp_history_index = 0;
 		info->temp_sum = 0;
 	}
-	if (unlikely(info->temp_max >= opt_bitmain_overheat)) {
-		applog(LOG_WARNING, "%s%d: overheat! Idling",
-				    bitmain->drv->name, bitmain->device_id);
-		info->overheat = true;
-	} else if (info->overheat && info->temp_max <= opt_bitmain_temp) {
+	if (unlikely(info->temp_hi >= opt_bitmain_overheat)) {
+		if (!info->overheat) {
+			applog(LOG_WARNING, "%s%d: overheat! hi %dC limit %dC idling",
+					    bitmain->drv->name, bitmain->device_id,
+					    info->temp_hi, opt_bitmain_overheat);
+			info->overheat = true;
+			info->overheat_temp = info->temp_hi;
+			info->overheat_count++;
+			info->overheat_slept = 0;
+		}
+	} else if (info->overheat && info->temp_hi <= opt_bitmain_temp) {
 		applog(LOG_WARNING, "%s%d: cooled, restarting",
 				    bitmain->drv->name, bitmain->device_id);
 		info->overheat = false;
+		info->overheat_recovers++;
 	}
 }
 
@@ -1107,6 +1116,7 @@ static void *bitmain_get_results(void *userdata)
 		//	cgsleep_ms_r(&ts_start, BITMAIN_READ_TIMEOUT);
 
 		//cgsleep_prepare_r(&ts_start);
+
 		applog(LOG_DEBUG, "%s%d: %s() read",
 				  bitmain->drv->name, bitmain->device_id, __func__);
 		ret = bitmain_read(bitmain, buf, rsize, BITMAIN_READ_TIMEOUT, C_BITMAIN_READ);
@@ -1496,6 +1506,48 @@ static bool bitmain_fill(struct cgpu_info *bitmain)
 	int timediff = 0;
 	K_ITEM *witem;
 
+	/*
+	 * Overheat just means delay the next work
+	 * since the temperature reply is only found with a work reply,
+	 * we can only sleep and hope it will cool down
+	 * TODO: of course it may be possible to read the temperature
+	 * without sending work ...
+	 */
+	if (info->overheat == true) {
+		if (info->overheat_sleep_ms == 0)
+			info->overheat_sleep_ms = BITMAIN_OVERHEAT_SLEEP_MS_DEF;
+
+		/*
+		 * If we slept and we are still here, and the temp didn't drop,
+		 * increment the sleep time to find a sleep time that causes a
+		 * temperature drop
+		 */
+		if (info->overheat_slept) {
+			if (info->overheat_temp > info->temp_hi)
+				info->overheat_temp = info->temp_hi;
+			else {
+				if (info->overheat_sleep_ms < BITMAIN_OVERHEAT_SLEEP_MS_MAX)
+					info->overheat_sleep_ms += BITMAIN_OVERHEAT_SLEEP_MS_STEP;
+			}
+		}
+
+		applog(LOG_DEBUG, "%s%d: %s() sleeping %"PRIu32" - overheated",
+				  bitmain->drv->name, bitmain->device_id,
+				  __func__, info->overheat_sleep_ms);
+		cgsleep_ms(info->overheat_sleep_ms);
+		info->overheat_sleeps++;
+		info->overheat_slept = info->overheat_sleep_ms;
+		info->overheat_total_sleep += info->overheat_sleep_ms;
+	} else {
+		// If we slept and it cooled then try less next time
+		if (info->overheat_slept) {
+			if (info->overheat_sleep_ms > BITMAIN_OVERHEAT_SLEEP_MS_MIN)
+				info->overheat_sleep_ms -= BITMAIN_OVERHEAT_SLEEP_MS_STEP;
+			info->overheat_slept = 0;
+		}
+
+	}
+
 	applog(LOG_DEBUG, "%s%d: %s() start",
 			  bitmain->drv->name, bitmain->device_id,
 			  __func__);
@@ -1787,6 +1839,24 @@ static struct api_data *bitmain_api_stats(struct cgpu_info *cgpu)
 					(float)(info->failed_search) : 0;
 	root = api_add_avg(root, "avg_failed", &avg, true);
 
+	root = api_add_int(root, "temp_hi", &(info->temp_hi), false);
+	root = api_add_bool(root, "overheat", &(info->overheat), true);
+	root = api_add_int(root, "overheat_temp", &(info->overheat_temp), true);
+	root = api_add_uint32(root, "overheat_count", &(info->overheat_count), true);
+	root = api_add_uint32(root, "overheat_sleep_ms", &(info->overheat_sleep_ms), true);
+	root = api_add_uint32(root, "overheat_sleeps", &(info->overheat_sleeps), true);
+	root = api_add_uint32(root, "overheat_slept", &(info->overheat_slept), true);
+	root = api_add_uint64(root, "overheat_total_sleep", &(info->overheat_total_sleep), true);
+	root = api_add_uint32(root, "overheat_recovers", &(info->overheat_recovers), true);
+
+	root = api_add_int(root, "opt_bitmain_temp", &opt_bitmain_temp, false);
+	root = api_add_int(root, "opt_bitmain_overheat", &opt_bitmain_overheat, false);
+	root = api_add_int(root, "opt_bitmain_fan_min", &opt_bitmain_fan_min, false);
+	root = api_add_int(root, "opt_bitmain_fan_max", &opt_bitmain_fan_max, false);
+	root = api_add_int(root, "opt_bitmain_freq_min", &opt_bitmain_freq_min, false);
+	root = api_add_int(root, "opt_bitmain_freq_max", &opt_bitmain_freq_max, false);
+	root = api_add_bool(root, "opt_bitmain_auto", &opt_bitmain_auto, false);
+
 	return root;
 }
 
diff --git a/driver-bitmain.h b/driver-bitmain.h
index 605fdd9..be066c1 100644
--- a/driver-bitmain.h
+++ b/driver-bitmain.h
@@ -73,6 +73,11 @@
 #define BITMAIN_SEND_STATUS_TIME   10 //s
 #define BITMAIN_SEND_FULL_SPACE    128
 
+#define BITMAIN_OVERHEAT_SLEEP_MS_MAX 10000
+#define BITMAIN_OVERHEAT_SLEEP_MS_MIN 200
+#define BITMAIN_OVERHEAT_SLEEP_MS_DEF 600
+#define BITMAIN_OVERHEAT_SLEEP_MS_STEP 200
+
 struct bitmain_txconfig_token {
 	uint8_t token_type;
 	uint8_t length;
@@ -187,6 +192,7 @@ struct bitmain_info {
 	int temp[BITMAIN_MAX_TEMP_NUM];
 
 	int temp_max;
+	int temp_hi;
 	int temp_avg;
 	int temp_history_count;
 	int temp_history_index;
@@ -219,6 +225,13 @@ struct bitmain_info {
 	bool reset;
 	bool overheat;
 	bool optimal;
+	int overheat_temp;
+	uint32_t overheat_count;
+	uint32_t overheat_sleep_ms;
+	uint32_t overheat_sleeps;
+	uint32_t overheat_slept;
+	uint64_t overheat_total_sleep;
+	uint32_t overheat_recovers;
 
 	// Work
 	K_LIST *work_list;