Delayed responses from testing pools that are down can hold up the watchdog thread from getting to its device testing code, leading to false detection of the GPU not checking in, and can substantially delay auto gpu/auto fan management leading to overheating. Move pool watching to its own thread.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
diff --git a/cgminer.c b/cgminer.c
index d43bad6..dcbc670 100644
--- a/cgminer.c
+++ b/cgminer.c
@@ -142,6 +142,7 @@ struct thr_info *thr_info;
static int work_thr_id;
int longpoll_thr_id;
static int stage_thr_id;
+static int watchpool_thr_id;
static int watchdog_thr_id;
static int input_thr_id;
int gpur_thr_id;
@@ -1599,6 +1600,12 @@ void kill_work(void)
applog(LOG_INFO, "Received kill message");
if (opt_debug)
+ applog(LOG_DEBUG, "Killing off watchpool thread");
+ /* Kill the watchpool thread */
+ thr = &thr_info[watchpool_thr_id];
+ thr_info_cancel(thr);
+
+ if (opt_debug)
applog(LOG_DEBUG, "Killing off watchdog thread");
/* Kill the watchdog thread */
thr = &thr_info[watchdog_thr_id];
@@ -3533,13 +3540,49 @@ void reinit_device(struct cgpu_info *cgpu)
cgpu->api->reinit_device(cgpu);
}
+static struct timeval rotate_tv;
+
+static void *watchpool_thread(void __maybe_unused *userdata)
+{
+ pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+
+ while (42) {
+ struct timeval now;
+ int i;
+
+ gettimeofday(&now, NULL);
+
+ for (i = 0; i < total_pools; i++) {
+ struct pool *pool = pools[i];
+
+ if (!pool->enabled)
+ continue;
+
+ /* Test pool is idle once every minute */
+ if (pool->idle && now.tv_sec - pool->tv_idle.tv_sec > 60) {
+ gettimeofday(&pool->tv_idle, NULL);
+ if (pool_active(pool, true) && pool_tclear(pool, &pool->idle))
+ pool_resus(pool);
+ }
+ }
+
+ if (pool_strategy == POOL_ROTATE && now.tv_sec - rotate_tv.tv_sec > 60 * opt_rotate_period) {
+ gettimeofday(&rotate_tv, NULL);
+ switch_pools(NULL);
+ }
+
+ sleep(10);
+ }
+ return NULL;
+}
+
+
/* Makes sure the hashmeter keeps going even if mining threads stall, updates
* the screen at regular intervals, and restarts threads if they appear to have
* died. */
static void *watchdog_thread(void __maybe_unused *userdata)
{
const unsigned int interval = 3;
- static struct timeval rotate_tv;
struct timeval zero_tv;
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
@@ -3569,25 +3612,6 @@ static void *watchdog_thread(void __maybe_unused *userdata)
gettimeofday(&now, NULL);
- for (i = 0; i < total_pools; i++) {
- struct pool *pool = pools[i];
-
- if (!pool->enabled)
- continue;
-
- /* Test pool is idle once every minute */
- if (pool->idle && now.tv_sec - pool->tv_idle.tv_sec > 60) {
- gettimeofday(&pool->tv_idle, NULL);
- if (pool_active(pool, true) && pool_tclear(pool, &pool->idle))
- pool_resus(pool);
- }
- }
-
- if (pool_strategy == POOL_ROTATE && now.tv_sec - rotate_tv.tv_sec > 60 * opt_rotate_period) {
- gettimeofday(&rotate_tv, NULL);
- switch_pools(NULL);
- }
-
if (!sched_paused && !should_run()) {
applog(LOG_WARNING, "Pausing execution as per stop time %02d:%02d scheduled",
schedstop.tm.tm_hour, schedstop.tm.tm_min);
@@ -4248,7 +4272,7 @@ int main (int argc, char *argv[])
fork_monitor();
#endif // defined(unix)
- total_threads = mining_threads + 7;
+ total_threads = mining_threads + 8;
work_restart = calloc(total_threads, sizeof(*work_restart));
if (!work_restart)
quit(1, "Failed to calloc work_restart");
@@ -4277,7 +4301,7 @@ int main (int argc, char *argv[])
if (!thr->q)
quit(1, "Failed to tq_new");
- stage_thr_id = mining_threads + 3;
+ stage_thr_id = mining_threads + 2;
thr = &thr_info[stage_thr_id];
thr->q = tq_new();
if (!thr->q)
@@ -4400,14 +4424,22 @@ retry_pools:
if (use_curses)
enable_curses();
- watchdog_thr_id = mining_threads + 2;
+ watchpool_thr_id = mining_threads + 3;
+ thr = &thr_info[watchpool_thr_id];
+ /* start watchpool thread */
+ if (thr_info_create(thr, NULL, watchpool_thread, NULL))
+ quit(1, "watchpool thread create failed");
+ pthread_detach(thr->pth);
+
+ watchdog_thr_id = mining_threads + 4;
thr = &thr_info[watchdog_thr_id];
- /* start wakeup thread */
+ /* start watchdog thread */
if (thr_info_create(thr, NULL, watchdog_thread, NULL))
- quit(1, "wakeup thread create failed");
+ quit(1, "watchdog thread create failed");
+ pthread_detach(thr->pth);
/* Create reinit gpu thread */
- gpur_thr_id = mining_threads + 4;
+ gpur_thr_id = mining_threads + 5;
thr = &thr_info[gpur_thr_id];
thr->q = tq_new();
if (!thr->q)
@@ -4416,7 +4448,7 @@ retry_pools:
quit(1, "reinit_gpu thread create failed");
/* Create API socket thread */
- api_thr_id = mining_threads + 5;
+ api_thr_id = mining_threads + 6;
thr = &thr_info[api_thr_id];
if (thr_info_create(thr, NULL, api_thread, thr))
quit(1, "API thread create failed");
@@ -4425,7 +4457,7 @@ retry_pools:
/* Create curses input thread for keyboard input. Create this last so
* that we know all threads are created since this can call kill_work
* to try and shut down ll previous threads. */
- input_thr_id = mining_threads + 6;
+ input_thr_id = mining_threads + 7;
thr = &thr_info[input_thr_id];
if (thr_info_create(thr, NULL, input_thread, thr))
quit(1, "input thread create failed");