Commit 088ee2fa2939ce3c57a14dd8b985ae9e90a41a44

Con Kolivas 2011-08-23T10:47:47

Only use one thread to determine if a GPU is sick or well, and make sure to reset the sick restart attempt time.

diff --git a/main.c b/main.c
index 974db8f..fb37d81 100644
--- a/main.c
+++ b/main.c
@@ -4128,8 +4128,14 @@ static void *watchdog_thread(void *userdata)
 
 		//for (i = 0; i < mining_threads; i++) {
 		for (i = 0; i < gpu_threads; i++) {
-			struct thr_info *thr = &thr_info[i];
-			int gpu = thr->cgpu->cpu_gpu;
+			struct thr_info *thr;
+			int gpu;
+
+			/* Use only one thread per device to determine if the GPU is healthy */
+			if (i >= nDevs)
+				break;
+			thr = &thr_info[i];
+			gpu = thr->cgpu->cpu_gpu;
 
 			/* Thread is waiting on getwork or disabled */
 			if (thr->getwork || !gpu_devices[gpu])
@@ -4150,6 +4156,7 @@ static void *watchdog_thread(void *userdata)
 				applog(LOG_ERR, "Thread %d not responding for more than 10 minutes, GPU %d declared DEAD!", i, gpu);
 			} else if (now.tv_sec - thr->sick.tv_sec > 60 && gpus[i].status == LIFE_SICK) {
 				/* Attempt to restart a GPU once every minute */
+				gettimeofday(&thr->sick, NULL);
 				reinit_device(thr->cgpu);
 			}
 		}