minion - record 0xff error history and reduce screen output
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
diff --git a/driver-minion.c b/driver-minion.c
index d191fb8..d5fce4e 100644
--- a/driver-minion.c
+++ b/driver-minion.c
@@ -598,11 +598,21 @@ typedef struct perf_item {
#define ALLOC_PERF_ITEMS 128
#define LIMIT_PERF_ITEMS 0
+// *** 0xff error history - uses max 20 and rolls over
+typedef struct xff_item {
+ struct timeval when;
+ const char *what;
+} XFF_ITEM;
+
+#define ALLOC_XFF_ITEMS 20
+#define LIMIT_XFF_ITEMS 20
+
#define DATA_WORK(_item) ((WORK_ITEM *)(_item->data))
#define DATA_TASK(_item) ((TASK_ITEM *)(_item->data))
#define DATA_RES(_item) ((RES_ITEM *)(_item->data))
#define DATA_HIST(_item) ((HIST_ITEM *)(_item->data))
#define DATA_PERF(_item) ((PERF_ITEM *)(_item->data))
+#define DATA_XFF(_item) ((XFF_ITEM *)(_item->data))
// Set this to 1 to enable iostats processing
// N.B. it slows down mining
@@ -835,6 +845,10 @@ struct minion_info {
K_LIST *pfree_list;
K_STORE *p_list[MINION_CHIPS];
+ // 0xff history
+ K_LIST *xfree_list;
+ K_STORE *xff_list;
+
// Gets reset to zero each time it is used in reporting
int res_err_count[MINION_CHIPS];
@@ -1257,6 +1271,9 @@ static int __do_ioctl(struct cgpu_info *minioncgpu, struct minion_info *minionin
if (fail) {
char *what = "unk";
+ K_ITEM *xitem;
+ bool show = false;
+ double lastshow;
switch (obuf[1]) {
case READ_ADDR(MINION_RES_DATA):
what = "nonce";
@@ -1265,9 +1282,27 @@ static int __do_ioctl(struct cgpu_info *minioncgpu, struct minion_info *minionin
what = "fifo";
break;
}
- applog(LOG_ERR, "%s%d: ioctl %"PRIu64" %s returned all 0xff - resetting",
- minioncgpu->drv->name, minioncgpu->device_id,
- *ioseq, what);
+ K_WLOCK(minioninfo->xfree_list);
+ if (minioninfo->xfree_list->count > 0)
+ xitem = k_unlink_head(minioninfo->xfree_list);
+ else
+ xitem = k_unlink_tail(minioninfo->xff_list);
+ DATA_XFF(xitem)->what = what;
+ cgtime(&(DATA_XFF(xitem)->when));
+ if (!minioninfo->xff_list->head)
+ show = true;
+ else {
+ lastshow = tdiff(&(DATA_XFF(xitem)->when),
+ &(DATA_XFF(minioninfo->xff_list->head)->when));
+ show = (lastshow >= 5.0);
+ }
+ k_add_head(minioninfo->xff_list, xitem);
+ K_WUNLOCK(minioninfo->xfree_list);
+ if (show) {
+ applog(LOG_ERR, "%s%d: ioctl %"PRIu64" %s returned all 0xff - resetting",
+ minioncgpu->drv->name, minioncgpu->device_id,
+ *ioseq, what);
+ }
}
#if MINION_SHOW_IO
@@ -2334,6 +2369,10 @@ static void minion_detect(bool hotplug)
for (i = 0; i < (int)MINION_CHIPS; i++)
minioninfo->p_list[i] = k_new_store(minioninfo->pfree_list);
+ minioninfo->xfree_list = k_new_list("0xff", sizeof(XFF_ITEM),
+ ALLOC_XFF_ITEMS, LIMIT_XFF_ITEMS, true);
+ minioninfo->xff_list = k_new_store(minioninfo->xfree_list);
+
cgsem_init(&(minioninfo->task_ready));
cgsem_init(&(minioninfo->nonce_ready));
cgsem_init(&(minioninfo->scan_work));
@@ -2933,19 +2972,20 @@ static void *minion_spi_reply(void *userdata)
if (fifo_task.reply < (int)(fifo_task.osiz)) {
char *buf = bin2hex((unsigned char *)(&(fifo_task.rbuf[fifo_task.osiz - fifo_task.rsiz])),
(int)(fifo_task.rsiz));
- applog(LOG_ERR, "%s%i: Chip %d Bad fifo reply (%s) size %d, should be %d",
- minioncgpu->drv->name, minioncgpu->device_id,
- chip, buf,
- fifo_task.reply, (int)(fifo_task.osiz));
+ applog(LOG_DEBUG, "%s%i: Chip %d Bad fifo reply (%s) size %d, should be %d",
+ minioncgpu->drv->name, minioncgpu->device_id,
+ chip, buf,
+ fifo_task.reply, (int)(fifo_task.osiz));
free(buf);
minioninfo->spi_errors++;
minioninfo->fifo_spi_errors[chip]++;
minioninfo->res_err_count[chip]++;
} else {
if (fifo_task.reply > (int)(fifo_task.osiz)) {
- applog(LOG_ERR, "%s%i: Chip %d Unexpected fifo reply size %d, expected only %d",
- minioncgpu->drv->name, minioncgpu->device_id,
- chip, fifo_task.reply, (int)(fifo_task.osiz));
+ applog(LOG_DEBUG, "%s%i: Chip %d Unexpected fifo reply size %d, "
+ "expected only %d",
+ minioncgpu->drv->name, minioncgpu->device_id,
+ chip, fifo_task.reply, (int)(fifo_task.osiz));
}
res = FIFO_RES(fifo_task.rbuf, fifo_task.osiz - fifo_task.rsiz);
cmd = FIFO_CMD(fifo_task.rbuf, fifo_task.osiz - fifo_task.rsiz);
@@ -2953,9 +2993,11 @@ static void *minion_spi_reply(void *userdata)
if (res <= MINION_QUE_MAX && cmd <= MINION_QUE_MAX)
break;
- applog(LOG_ERR, "%s%i: Chip %d Bad fifo reply res %d (max is %d) cmd %d (max is %d)",
- minioncgpu->drv->name, minioncgpu->device_id,
- chip, (int)res, MINION_QUE_MAX, (int)cmd, MINION_QUE_MAX);
+ applog(LOG_DEBUG, "%s%i: Chip %d Bad fifo reply res %d (max is %d) "
+ "cmd %d (max is %d)",
+ minioncgpu->drv->name, minioncgpu->device_id,
+ chip, (int)res, MINION_QUE_MAX,
+ (int)cmd, MINION_QUE_MAX);
minioninfo->spi_errors++;
minioninfo->fifo_spi_errors[chip]++;
minioninfo->res_err_count[chip]++;