Icarus catch more USB errors and close/reopen the port
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
diff --git a/driver-icarus.c b/driver-icarus.c
index 4214c31..cc74df2 100644
--- a/driver-icarus.c
+++ b/driver-icarus.c
@@ -223,6 +223,11 @@ static void rev(unsigned char *s, size_t l)
#define icarus_open2(devpath, baud, purge) serial_open(devpath, baud, ICARUS_READ_FAULT_DECISECONDS, purge)
#define icarus_open(devpath, baud) icarus_open2(devpath, baud, false)
+#define ICA_GETS_ERROR -1
+#define ICA_GETS_OK 0
+#define ICA_GETS_RESTART 1
+#define ICA_GETS_TIMEOUT 2
+
static int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, struct thr_info *thr, int read_count)
{
ssize_t ret = 0;
@@ -233,12 +238,14 @@ static int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, st
// Read reply 1 byte at a time to get earliest tv_finish
while (true) {
ret = read(fd, buf, 1);
+ if (ret < 0)
+ return ICA_GETS_ERROR;
if (first)
gettimeofday(tv_finish, NULL);
if (ret >= read_amount)
- return 0;
+ return ICA_GETS_OK;
if (ret > 0) {
buf += ret;
@@ -254,16 +261,16 @@ static int icarus_gets(unsigned char *buf, int fd, struct timeval *tv_finish, st
"Icarus Read: No data in %.2f seconds",
(float)rc/(float)TIME_FACTOR);
}
- return 1;
+ return ICA_GETS_TIMEOUT;
}
- if (thr->work_restart) {
+ if (thr && thr->work_restart) {
if (opt_debug) {
applog(LOG_DEBUG,
"Icarus Read: Work restart at %.2f seconds",
(float)(rc)/(float)TIME_FACTOR);
}
- return 1;
+ return ICA_GETS_RESTART;
}
}
}
@@ -281,6 +288,13 @@ static int icarus_write(int fd, const void *buf, size_t bufLen)
#define icarus_close(fd) close(fd)
+static void do_icarus_close(struct thr_info *thr)
+{
+ struct cgpu_info *icarus = thr->cgpu;
+ icarus_close(icarus->device_fd);
+ icarus->device_fd = -1;
+}
+
static const char *timing_mode_str(enum timing_mode timing_mode)
{
switch(timing_mode) {
@@ -533,10 +547,7 @@ static bool icarus_detect_one(const char *devpath)
gettimeofday(&tv_start, NULL);
memset(nonce_bin, 0, sizeof(nonce_bin));
- struct thr_info dummy = {
- .work_restart = false,
- };
- icarus_gets(nonce_bin, fd, &tv_finish, &dummy, 1);
+ icarus_gets(nonce_bin, fd, &tv_finish, NULL, 1);
icarus_close(fd);
@@ -563,6 +574,7 @@ static bool icarus_detect_one(const char *devpath)
icarus = calloc(1, sizeof(struct cgpu_info));
icarus->api = &icarus_api;
icarus->device_path = strdup(devpath);
+ icarus->device_fd = -1;
icarus->threads = 1;
add_cgpu(icarus);
icarus_info = realloc(icarus_info, sizeof(struct ICARUS_INFO *) * (total_devices + 1));
@@ -607,6 +619,8 @@ static bool icarus_prepare(struct thr_info *thr)
struct timeval now;
+ icarus->device_fd = -1;
+
int fd = icarus_open(icarus->device_path, icarus_info[icarus->device_id]->baud);
if (unlikely(-1 == fd)) {
applog(LOG_ERR, "Failed to open Icarus on %s",
@@ -653,6 +667,17 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
elapsed.tv_sec = elapsed.tv_usec = 0;
icarus = thr->cgpu;
+ if (icarus->device_fd == -1)
+ if (!icarus_prepare(thr)) {
+ applog(LOG_ERR, "ICA%i: Comms error", icarus->device_id);
+ icarus->device_last_not_well = time(NULL);
+ icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR;
+ icarus->dev_comms_error_count++;
+
+ // fail the device if the reopen attempt fails
+ return -1;
+ }
+
fd = icarus->device_fd;
memset(ob_bin, 0, sizeof(ob_bin));
@@ -664,8 +689,10 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
tcflush(fd, TCOFLUSH);
#endif
ret = icarus_write(fd, ob_bin, sizeof(ob_bin));
- if (ret)
- return -1; /* This should never happen */
+ if (ret) {
+ do_icarus_close(thr);
+ return 0; /* This should never happen */
+ }
gettimeofday(&tv_start, NULL);
@@ -682,12 +709,19 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
memset(nonce_bin, 0, sizeof(nonce_bin));
info = icarus_info[icarus->device_id];
ret = icarus_gets(nonce_bin, fd, &tv_finish, thr, info->read_count);
+ if (ret == ICA_GETS_ERROR) {
+ do_icarus_close(thr);
+ applog(LOG_ERR, "ICA%i: Comms error", icarus->device_id);
+ icarus->device_last_not_well = time(NULL);
+ icarus->device_not_well_reason = REASON_DEV_COMMS_ERROR;
+ icarus->dev_comms_error_count++;
+ return 0;
+ }
work->blk.nonce = 0xffffffff;
- memcpy((char *)&nonce, nonce_bin, sizeof(nonce_bin));
// aborted before becoming idle, get new work
- if (nonce == 0 && ret) {
+ if (ret == ICA_GETS_TIMEOUT || ret == ICA_GETS_RESTART) {
timersub(&tv_finish, &tv_start, &elapsed);
// ONLY up to just when it aborted
@@ -709,6 +743,8 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
return estimate_hashes;
}
+ memcpy((char *)&nonce, nonce_bin, sizeof(nonce_bin));
+
#if !defined (__BIG_ENDIAN__) && !defined(MIPSEB)
nonce = swab32(nonce);
#endif
@@ -717,6 +753,10 @@ static int64_t icarus_scanhash(struct thr_info *thr, struct work *work,
submit_nonce(thr, work, nonce);
was_hw_error = (curr_hw_errors > icarus->hw_errors);
+ // Force a USB close/reopen on any hw error
+ if (was_hw_error)
+ do_icarus_close(thr);
+
hash_count = (nonce & info->nonce_mask);
hash_count++;
hash_count *= info->fpga_count;
@@ -862,8 +902,7 @@ static struct api_data *icarus_api_stats(struct cgpu_info *cgpu)
static void icarus_shutdown(struct thr_info *thr)
{
- struct cgpu_info *icarus = thr->cgpu;
- icarus_close(icarus->device_fd);
+ do_icarus_close(thr);
}
struct device_api icarus_api = {