Make the fail-pause progressively longer each time it fails until the network recovers.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
diff --git a/main.c b/main.c
index a72944d..2e6890d 100644
--- a/main.c
+++ b/main.c
@@ -181,6 +181,7 @@ static bool opt_realquiet = false;
static bool opt_loginput = false;
static int opt_retries = -1;
static int opt_fail_pause = 5;
+static int fail_pause = 5;
static int opt_log_interval = 5;
bool opt_log_output = false;
static bool opt_dynamic = true;
@@ -1775,9 +1776,11 @@ static void *get_work_thread(void *userdata)
/* pause, then restart work-request loop */
applog(LOG_DEBUG, "json_rpc_call failed on get work, retry after %d seconds",
- opt_fail_pause);
- sleep(opt_fail_pause);
+ fail_pause);
+ sleep(fail_pause);
+ fail_pause += opt_fail_pause;
}
+ fail_pause = opt_fail_pause;
if (opt_debug)
applog(LOG_DEBUG, "Pushing work to requesting thread");
@@ -1865,9 +1868,11 @@ static void *submit_work_thread(void *userdata)
/* pause, then restart work-request loop */
applog(LOG_INFO, "json_rpc_call failed on submit_work, retry after %d seconds",
- opt_fail_pause);
- sleep(opt_fail_pause);
+ fail_pause);
+ sleep(fail_pause);
+ fail_pause += opt_fail_pause;
}
+ fail_pause = opt_fail_pause;
out:
workio_cmd_free(wc);
return NULL;
@@ -3120,10 +3125,12 @@ out:
applog(LOG_ERR, "Failed %d times to get_work");
return ret;
}
- applog(LOG_DEBUG, "Retrying after %d seconds", opt_fail_pause);
- sleep(opt_fail_pause);
+ applog(LOG_DEBUG, "Retrying after %d seconds", fail_pause);
+ sleep(fail_pause);
+ fail_pause += opt_fail_pause;
goto retry;
}
+ fail_pause = opt_fail_pause;
work->thr_id = thr_id;
thread_reportin(thr);