bab - roll work if possible to reduce CPU
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
diff --git a/driver-bab.c b/driver-bab.c
index 2d28f8e..1f7cdae 100644
--- a/driver-bab.c
+++ b/driver-bab.c
@@ -268,6 +268,7 @@ typedef struct witem {
struct work *work;
struct bab_work_send chip_input;
bool ci_setup;
+ bool rolled;
int nonces;
struct timeval work_start;
} WITEM;
@@ -465,6 +466,8 @@ struct bab_info {
uint8_t bad_fast[BAB_MAXCHIPS];
bool dead_msg[BAB_MAXCHIPS];
#endif
+ uint64_t work_unrolled;
+ uint64_t work_rolled;
// bab-options (in order)
uint8_t max_speed;
@@ -549,6 +552,13 @@ struct bab_info {
*/
#define BAB_BAD_DEAD (BAB_BAD_TO_MIN * 2)
+/*
+ * Maximum bab_queue_full() will roll work if it is allowed to
+ * Since work can somtimes (rarely) queue up with many chips,
+ * limit it to avoid it getting too much range in the pending work
+ */
+#define BAB_MAX_ROLLTIME 42
+
static void bab_ms3steps(uint32_t *p)
{
uint32_t a, b, c, d, e, f, g, h, new_e, new_a;
@@ -632,7 +642,10 @@ static void cleanup_older(struct cgpu_info *babcgpu, int chip, K_ITEM *witem)
k_unlink_item(babinfo->chip_work[chip], tail);
K_WUNLOCK(babinfo->chip_work[chip]);
- work_completed(babcgpu, DATAW(tail)->work);
+ if (DATAW(tail)->rolled)
+ free_work(DATAW(tail)->work);
+ else
+ work_completed(babcgpu, DATAW(tail)->work);
K_WLOCK(babinfo->chip_work[chip]);
k_add_head(babinfo->wfree_list, tail);
tail = babinfo->chip_work[chip]->tail;
@@ -643,7 +656,10 @@ static void cleanup_older(struct cgpu_info *babcgpu, int chip, K_ITEM *witem)
while (tail && tail != witem) {
k_unlink_item(babinfo->chip_work[chip], tail);
K_WUNLOCK(babinfo->chip_work[chip]);
- work_completed(babcgpu, DATAW(tail)->work);
+ if (DATAW(tail)->rolled)
+ free_work(DATAW(tail)->work);
+ else
+ work_completed(babcgpu, DATAW(tail)->work);
K_WLOCK(babinfo->chip_work[chip]);
k_add_head(babinfo->wfree_list, tail);
tail = babinfo->chip_work[chip]->tail;
@@ -2299,10 +2315,11 @@ static void bab_shutdown(struct thr_info *thr)
static bool bab_queue_full(struct cgpu_info *babcgpu)
{
struct bab_info *babinfo = (struct bab_info *)(babcgpu->device_data);
- struct work *work;
+ int roll, roll_limit = BAB_MAX_ROLLTIME;
+ struct work *work, *usework;
K_ITEM *item;
- int count;
- bool ret;
+ int count, need;
+ bool ret, rolled;
K_RLOCK(babinfo->available_work);
count = babinfo->available_work->count;
@@ -2311,18 +2328,39 @@ static bool bab_queue_full(struct cgpu_info *babcgpu)
if (count >= (babinfo->chips - babinfo->total_disabled))
ret = true;
else {
+ need = (babinfo->chips - babinfo->total_disabled) - count;
work = get_queued(babcgpu);
if (work) {
- K_WLOCK(babinfo->wfree_list);
- item = k_unlink_head_zero(babinfo->wfree_list);
- DATAW(item)->work = work;
- k_add_head(babinfo->available_work, item);
- K_WUNLOCK(babinfo->wfree_list);
- } else
+ if (roll_limit > work->drv_rolllimit)
+ roll_limit = work->drv_rolllimit;
+ roll = 0;
+ do {
+ if (roll == 0) {
+ usework = work;
+ babinfo->work_unrolled++;
+ rolled = false;
+ } else {
+ usework = copy_work_noffset(work, roll);
+ babinfo->work_rolled++;
+ rolled = true;
+ }
+
+ K_WLOCK(babinfo->wfree_list);
+ item = k_unlink_head_zero(babinfo->wfree_list);
+ DATAW(item)->work = usework;
+ DATAW(item)->rolled = rolled;
+ k_add_head(babinfo->available_work, item);
+ K_WUNLOCK(babinfo->wfree_list);
+ } while (--need > 0 && ++roll <= roll_limit);
+ } else {
// Avoid a hard loop when we can't get work fast enough
cgsleep_us(42);
+ }
- ret = false;
+ if (need > 0)
+ ret = false;
+ else
+ ret = true;
}
return ret;
@@ -2947,6 +2985,9 @@ static struct api_data *bab_api_stats(struct cgpu_info *babcgpu)
root = api_add_int(root, "Reply Wait", &(babinfo->reply_wait), true);
root = api_add_uint64(root, "Reply Waits", &(babinfo->reply_waits), true);
+ root = api_add_uint64(root, "Work Unrolled", &(babinfo->work_unrolled), true);
+ root = api_add_uint64(root, "Work Rolled", &(babinfo->work_rolled), true);
+
i = (int)(babinfo->max_speed);
root = api_add_int(root, bab_options[0], &i, true);
i = (int)(babinfo->def_speed);