Only use one thread to determine if a GPU is sick or well, and make sure to reset the sick restart attempt time.
diff --git a/main.c b/main.c
index 974db8f..fb37d81 100644
--- a/main.c
+++ b/main.c
@@ -4128,8 +4128,14 @@ static void *watchdog_thread(void *userdata)
//for (i = 0; i < mining_threads; i++) {
for (i = 0; i < gpu_threads; i++) {
- struct thr_info *thr = &thr_info[i];
- int gpu = thr->cgpu->cpu_gpu;
+ struct thr_info *thr;
+ int gpu;
+
+ /* Use only one thread per device to determine if the GPU is healthy */
+ if (i >= nDevs)
+ break;
+ thr = &thr_info[i];
+ gpu = thr->cgpu->cpu_gpu;
/* Thread is waiting on getwork or disabled */
if (thr->getwork || !gpu_devices[gpu])
@@ -4150,6 +4156,7 @@ static void *watchdog_thread(void *userdata)
applog(LOG_ERR, "Thread %d not responding for more than 10 minutes, GPU %d declared DEAD!", i, gpu);
} else if (now.tv_sec - thr->sick.tv_sec > 60 && gpus[i].status == LIFE_SICK) {
/* Attempt to restart a GPU once every minute */
+ gettimeofday(&thr->sick, NULL);
reinit_device(thr->cgpu);
}
}