Merge pull request #569 from kanoi/master Ant S1 - overheat handling + API-README versions fix
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
diff --git a/API-README b/API-README
index 303a62b..a10017c 100644
--- a/API-README
+++ b/API-README
@@ -503,7 +503,7 @@ miner.php - an example web page to access the API
Feature Changelog for external applications using the API:
-API V3.3 (cgminer v4.1.1)
+API V3.3 (cgminer v4.2.0)
Added API commands:
'edevs' - Only enabled devices, for 'devs'
@@ -1674,7 +1674,8 @@ With cgminer 2.10.2 and later, miner.php includes an extension to
the custom pages that allows you to apply SQL style commands to
the data: where, group, and having
cgminer 3.4.2 and later also includes another option 'gen'
-cgminer 4.1.1 and later also includes 2 another options 'fmt' and 'bgen'
+cgminer 4.2.0 and later also includes another option 'fmt'
+cgminer 4.2.1 and later also includes another option 'bgen'
An example of an 'ext' section in a more complex custom summary page:
diff --git a/driver-bitmain.c b/driver-bitmain.c
index 9dc88b7..6ea9930 100644
--- a/driver-bitmain.c
+++ b/driver-bitmain.c
@@ -748,6 +748,7 @@ static inline void record_temp_fan(struct bitmain_info *info, struct bitmain_rxs
info->fan[i] = bm->fan[i] * BITMAIN_FAN_FACTOR;
}
info->temp_num = bm->temp_num;
+ info->temp_hi = 0;
for (i = 0; i < bm->temp_num; i++) {
info->temp[i] = bm->temp[i];
/*
@@ -757,9 +758,10 @@ static inline void record_temp_fan(struct bitmain_info *info, struct bitmain_rxs
}*/
*temp_avg += info->temp[i];
- if (info->temp[i] > info->temp_max) {
+ if (info->temp[i] > info->temp_max)
info->temp_max = info->temp[i];
- }
+ if (info->temp[i] > info->temp_hi)
+ info->temp_hi = info->temp[i];
}
if (bm->temp_num > 0) {
@@ -804,14 +806,21 @@ static void bitmain_update_temps(struct cgpu_info *bitmain, struct bitmain_info
info->temp_history_index = 0;
info->temp_sum = 0;
}
- if (unlikely(info->temp_max >= opt_bitmain_overheat)) {
- applog(LOG_WARNING, "%s%d: overheat! Idling",
- bitmain->drv->name, bitmain->device_id);
- info->overheat = true;
- } else if (info->overheat && info->temp_max <= opt_bitmain_temp) {
+ if (unlikely(info->temp_hi >= opt_bitmain_overheat)) {
+ if (!info->overheat) {
+ applog(LOG_WARNING, "%s%d: overheat! hi %dC limit %dC idling",
+ bitmain->drv->name, bitmain->device_id,
+ info->temp_hi, opt_bitmain_overheat);
+ info->overheat = true;
+ info->overheat_temp = info->temp_hi;
+ info->overheat_count++;
+ info->overheat_slept = 0;
+ }
+ } else if (info->overheat && info->temp_hi <= opt_bitmain_temp) {
applog(LOG_WARNING, "%s%d: cooled, restarting",
bitmain->drv->name, bitmain->device_id);
info->overheat = false;
+ info->overheat_recovers++;
}
}
@@ -1100,13 +1109,9 @@ static void *bitmain_get_results(void *userdata)
offset = 0;
}
- /* As the usb read returns after just 1ms, sleep long enough
- * to leave the interface idle for writes to occur, but do not
- * sleep if we have been receiving data as more may be coming. */
- //if (offset == 0)
- // cgsleep_ms_r(&ts_start, BITMAIN_READ_TIMEOUT);
+ // 2ms shouldn't be too much
+ cgsleep_ms(2);
- //cgsleep_prepare_r(&ts_start);
applog(LOG_DEBUG, "%s%d: %s() read",
bitmain->drv->name, bitmain->device_id, __func__);
ret = bitmain_read(bitmain, buf, rsize, BITMAIN_READ_TIMEOUT, C_BITMAIN_READ);
@@ -1496,6 +1501,48 @@ static bool bitmain_fill(struct cgpu_info *bitmain)
int timediff = 0;
K_ITEM *witem;
+ /*
+ * Overheat just means delay the next work
+ * since the temperature reply is only found with a work reply,
+ * we can only sleep and hope it will cool down
+ * TODO: of course it may be possible to read the temperature
+ * without sending work ...
+ */
+ if (info->overheat == true) {
+ if (info->overheat_sleep_ms == 0)
+ info->overheat_sleep_ms = BITMAIN_OVERHEAT_SLEEP_MS_DEF;
+
+ /*
+ * If we slept and we are still here, and the temp didn't drop,
+ * increment the sleep time to find a sleep time that causes a
+ * temperature drop
+ */
+ if (info->overheat_slept) {
+ if (info->overheat_temp > info->temp_hi)
+ info->overheat_temp = info->temp_hi;
+ else {
+ if (info->overheat_sleep_ms < BITMAIN_OVERHEAT_SLEEP_MS_MAX)
+ info->overheat_sleep_ms += BITMAIN_OVERHEAT_SLEEP_MS_STEP;
+ }
+ }
+
+ applog(LOG_DEBUG, "%s%d: %s() sleeping %"PRIu32" - overheated",
+ bitmain->drv->name, bitmain->device_id,
+ __func__, info->overheat_sleep_ms);
+ cgsleep_ms(info->overheat_sleep_ms);
+ info->overheat_sleeps++;
+ info->overheat_slept = info->overheat_sleep_ms;
+ info->overheat_total_sleep += info->overheat_sleep_ms;
+ } else {
+ // If we slept and it cooled then try less next time
+ if (info->overheat_slept) {
+ if (info->overheat_sleep_ms > BITMAIN_OVERHEAT_SLEEP_MS_MIN)
+ info->overheat_sleep_ms -= BITMAIN_OVERHEAT_SLEEP_MS_STEP;
+ info->overheat_slept = 0;
+ }
+
+ }
+
applog(LOG_DEBUG, "%s%d: %s() start",
bitmain->drv->name, bitmain->device_id,
__func__);
@@ -1787,6 +1834,24 @@ static struct api_data *bitmain_api_stats(struct cgpu_info *cgpu)
(float)(info->failed_search) : 0;
root = api_add_avg(root, "avg_failed", &avg, true);
+ root = api_add_int(root, "temp_hi", &(info->temp_hi), false);
+ root = api_add_bool(root, "overheat", &(info->overheat), true);
+ root = api_add_int(root, "overheat_temp", &(info->overheat_temp), true);
+ root = api_add_uint32(root, "overheat_count", &(info->overheat_count), true);
+ root = api_add_uint32(root, "overheat_sleep_ms", &(info->overheat_sleep_ms), true);
+ root = api_add_uint32(root, "overheat_sleeps", &(info->overheat_sleeps), true);
+ root = api_add_uint32(root, "overheat_slept", &(info->overheat_slept), true);
+ root = api_add_uint64(root, "overheat_total_sleep", &(info->overheat_total_sleep), true);
+ root = api_add_uint32(root, "overheat_recovers", &(info->overheat_recovers), true);
+
+ root = api_add_int(root, "opt_bitmain_temp", &opt_bitmain_temp, false);
+ root = api_add_int(root, "opt_bitmain_overheat", &opt_bitmain_overheat, false);
+ root = api_add_int(root, "opt_bitmain_fan_min", &opt_bitmain_fan_min, false);
+ root = api_add_int(root, "opt_bitmain_fan_max", &opt_bitmain_fan_max, false);
+ root = api_add_int(root, "opt_bitmain_freq_min", &opt_bitmain_freq_min, false);
+ root = api_add_int(root, "opt_bitmain_freq_max", &opt_bitmain_freq_max, false);
+ root = api_add_bool(root, "opt_bitmain_auto", &opt_bitmain_auto, false);
+
return root;
}
diff --git a/driver-bitmain.h b/driver-bitmain.h
index 605fdd9..be066c1 100644
--- a/driver-bitmain.h
+++ b/driver-bitmain.h
@@ -73,6 +73,11 @@
#define BITMAIN_SEND_STATUS_TIME 10 //s
#define BITMAIN_SEND_FULL_SPACE 128
+#define BITMAIN_OVERHEAT_SLEEP_MS_MAX 10000
+#define BITMAIN_OVERHEAT_SLEEP_MS_MIN 200
+#define BITMAIN_OVERHEAT_SLEEP_MS_DEF 600
+#define BITMAIN_OVERHEAT_SLEEP_MS_STEP 200
+
struct bitmain_txconfig_token {
uint8_t token_type;
uint8_t length;
@@ -187,6 +192,7 @@ struct bitmain_info {
int temp[BITMAIN_MAX_TEMP_NUM];
int temp_max;
+ int temp_hi;
int temp_avg;
int temp_history_count;
int temp_history_index;
@@ -219,6 +225,13 @@ struct bitmain_info {
bool reset;
bool overheat;
bool optimal;
+ int overheat_temp;
+ uint32_t overheat_count;
+ uint32_t overheat_sleep_ms;
+ uint32_t overheat_sleeps;
+ uint32_t overheat_slept;
+ uint64_t overheat_total_sleep;
+ uint32_t overheat_recovers;
// Work
K_LIST *work_list;