Add low hash threshold in sick/dead processing Add check for fd in comms procedures
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
diff --git a/cgminer.c b/cgminer.c
index 8b7de73..ced6663 100644
--- a/cgminer.c
+++ b/cgminer.c
@@ -3648,7 +3648,8 @@ static inline bool should_roll(struct work *work)
* reject blocks as invalid. */
static inline bool can_roll(struct work *work)
{
- return (work->pool && work->rolltime && !work->clone && work->rolls < 7000);
+ return (work->pool && work->rolltime && !work->clone &&
+work->rolls < 7000 && !stale_work(work, false));
}
static void roll_work(struct work *work)
@@ -4404,9 +4405,16 @@ static void age_work(void)
/* Makes sure the hashmeter keeps going even if mining threads stall, updates
* the screen at regular intervals, and restarts threads if they appear to have
* died. */
+#define WATCHDOG_INTERVAL 3
+#define WATCHDOG_SICK_TIME 60
+#define WATCHDOG_DEAD_TIME 600
+#define WATCHDOG_SICK_COUNT (WATCHDOG_SICK_TIME/WATCHDOG_INTERVAL)
+#define WATCHDOG_DEAD_COUNT (WATCHDOG_DEAD_TIME/WATCHDOG_INTERVAL)
+#define WATCHDOG_LOW_HASH 1.0 /* consider < 1MH too low for any device */
+
static void *watchdog_thread(void __maybe_unused *userdata)
{
- const unsigned int interval = 3;
+ const unsigned int interval = WATCHDOG_INTERVAL;
struct timeval zero_tv;
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
@@ -4510,11 +4518,24 @@ static void *watchdog_thread(void __maybe_unused *userdata)
if (thr->getwork || *denable == DEV_DISABLED)
continue;
- if (cgpu->status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) {
+ if (cgpu->rolling < WATCHDOG_LOW_HASH)
+ cgpu->low_count++;
+ else
+ cgpu->low_count = 0;
+
+ uint64_t hashtime = now.tv_sec - thr->last.tv_sec;
+ bool dev_time_well = hashtime < WATCHDOG_SICK_TIME;
+ bool dev_time_sick = hashtime > WATCHDOG_SICK_TIME;
+ bool dev_time_dead = hashtime > WATCHDOG_DEAD_TIME;
+ bool dev_count_well = cgpu->low_count < WATCHDOG_SICK_COUNT;
+ bool dev_count_sick = cgpu->low_count > WATCHDOG_SICK_COUNT;
+ bool dev_count_dead = cgpu->low_count > WATCHDOG_DEAD_COUNT;
+
+ if (cgpu->status != LIFE_WELL && dev_time_well && dev_count_well) {
applog(LOG_ERR, "%s: Recovered, declaring WELL!", dev_str);
cgpu->status = LIFE_WELL;
cgpu->device_last_well = time(NULL);
- } else if (now.tv_sec - thr->last.tv_sec > 60 && cgpu->status == LIFE_WELL) {
+ } else if (cgpu->status == LIFE_WELL && (dev_time_sick || dev_count_sick)) {
thr->rolling = cgpu->rolling = 0;
cgpu->status = LIFE_SICK;
applog(LOG_ERR, "%s: Idle for more than 60 seconds, declaring SICK!", dev_str);
@@ -4533,7 +4554,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
applog(LOG_ERR, "%s: Attempting to restart", dev_str);
reinit_device(cgpu);
}
- } else if (now.tv_sec - thr->last.tv_sec > 600 && cgpu->status == LIFE_SICK) {
+ } else if (cgpu->status == LIFE_SICK && (dev_time_dead || dev_count_dead)) {
cgpu->status = LIFE_DEAD;
applog(LOG_ERR, "%s: Not responded for more than 10 minutes, declaring DEAD!", dev_str);
gettimeofday(&thr->sick, NULL);
@@ -5473,3 +5494,4 @@ begin_bench:
return 0;
}
+
diff --git a/driver-bitforce.c b/driver-bitforce.c
index 6372944..0a47df5 100644
--- a/driver-bitforce.c
+++ b/driver-bitforce.c
@@ -144,13 +144,12 @@ void bitforce_init(struct cgpu_info *bitforce)
char pdevbuf[0x100];
char *s;
- applog(LOG_INFO, "BFL%i: Re-initalizing", bitforce->device_id);
+ applog(LOG_WARNING, "BFL%i: Re-initalizing", bitforce->device_id);
mutex_lock(&bitforce->device_mutex);
- if (fdDev) {
+ if (fdDev)
BFclose(fdDev);
- bitforce->device_fd = 0;
- }
+ bitforce->device_fd = 0;
fdDev = BFopen(devpath);
if (unlikely(fdDev == -1)) {
@@ -228,6 +227,9 @@ static bool bitforce_send_work(struct thr_info *thr, struct work *work)
unsigned char ob[61] = ">>>>>>>>12345678901234567890123456789012123456789012>>>>>>>>";
char *s;
+ if (!fdDev)
+ return false;
+
mutex_lock(&bitforce->device_mutex);
BFwrite(fdDev, "ZDX", 3);
BFgets(pdevbuf, sizeof(pdevbuf), fdDev);
@@ -277,6 +279,9 @@ static uint64_t bitforce_get_result(struct thr_info *thr, struct work *work)
char *pnoncebuf;
uint32_t nonce;
+ if (!fdDev)
+ return 0;
+
while (bitforce->wait_ms < BITFORCE_TIMEOUT_MS) {
mutex_lock(&bitforce->device_mutex);
BFwrite(fdDev, "ZFX", 3);
@@ -284,7 +289,6 @@ static uint64_t bitforce_get_result(struct thr_info *thr, struct work *work)
mutex_unlock(&bitforce->device_mutex);
if (unlikely(!pdevbuf[0])) {
applog(LOG_ERR, "BFL%i: Error reading (ZFX)", bitforce->device_id);
- mutex_unlock(&bitforce->device_mutex);
return 0;
}
if (pdevbuf[0] != 'B')
@@ -366,7 +370,7 @@ static uint64_t bitforce_scanhash(struct thr_info *thr, struct work *work, uint6
bitforce->wait_ms += WORK_CHECK_INTERVAL_MS;
if (work_restart[thr->id].restart) {
applog(LOG_DEBUG, "BFL%i: Work restart, discarding after %dms", bitforce->device_id, bitforce->wait_ms);
- return 1; //we have discarded all work; equivilent to 0 hashes done.
+ return 1; //we have discarded all work; equivalent to 0 hashes done.
}
}
} else {
@@ -420,3 +424,4 @@ struct device_api bitforce_api = {
.thread_enable = biforce_thread_enable
};
+
diff --git a/miner.h b/miner.h
index 9347d19..d32f6f0 100644
--- a/miner.h
+++ b/miner.h
@@ -326,6 +326,7 @@ struct cgpu_info {
int accepted;
int rejected;
int hw_errors;
+ unsigned int low_count;
double rolling;
double total_mhashes;
double utility;
@@ -796,3 +797,4 @@ extern void adl(void);
extern void app_restart(void);
#endif /* __MINER_H__ */
+