Don't enqueuewrite buffer at all for pad8 and pass work details around for scrypt in dev_blk.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
diff --git a/cgminer.c b/cgminer.c
index a5529f2..14183ec 100644
--- a/cgminer.c
+++ b/cgminer.c
@@ -3983,11 +3983,15 @@ bool hashtest(const struct work *work)
bool test_nonce(struct work *work, uint32_t nonce)
{
+ uint32_t *work_nonce = (uint32_t *)(work->data + 64 + 12);
+
+ *work_nonce = htobe32(nonce);
+#if 0
work->data[64 + 12 + 0] = (nonce >> 0) & 0xff;
work->data[64 + 12 + 1] = (nonce >> 8) & 0xff;
work->data[64 + 12 + 2] = (nonce >> 16) & 0xff;
work->data[64 + 12 + 3] = (nonce >> 24) & 0xff;
-
+#endif
if (opt_scrypt)
return true;
diff --git a/driver-opencl.c b/driver-opencl.c
index 773bf8e..4ddf336 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -993,14 +993,14 @@ static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
}
#ifdef USE_SCRYPT
-static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
+static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_unused cl_uint threads)
{
- cl_uint4 *midstate = (cl_uint4 *)blk->midstate;
+ cl_uint4 *midstate = (cl_uint4 *)blk->work->midstate;
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
cl_int status = 0;
- int i;
+ clState->cldata = blk->work->data;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
CL_SET_ARG(clState->CLbuffer0);
@@ -1349,13 +1349,6 @@ static bool opencl_thread_init(struct thr_info *thr)
return false;
}
-#ifdef USE_SCRYPT
- if (opt_scrypt) {
- if (clState->padbufsize > BUFFERSIZE)
- blank_res = realloc(blank_res, clState->padbufsize);
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->padbuffer8, true, 0, clState->padbufsize, blank_res, 0, NULL,NULL);
- }
-#endif
status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
@@ -1385,7 +1378,12 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
{
- precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
+#ifdef USE_SCRYPT
+ if (opt_scrypt)
+ work->blk.work = work;
+ else
+#endif
+ precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
return true;
}
@@ -1446,9 +1444,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes;
-#ifdef USE_SCRYPT
- clState->cldata = work->data;
-#endif
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
diff --git a/findnonce.c b/findnonce.c
index ce282dc..d0e1917 100644
--- a/findnonce.c
+++ b/findnonce.c
@@ -45,7 +45,8 @@ const uint32_t SHA256_K[64] = {
d = d + h; \
h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
-void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
+void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data)
+{
cl_uint A, B, C, D, E, F, G, H;
A = state[0];
@@ -127,10 +128,6 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->fiveA = blk->ctx_f + SHA256_K[5];
blk->sixA = blk->ctx_g + SHA256_K[6];
blk->sevenA = blk->ctx_h + SHA256_K[7];
-
-#ifdef USE_SCRYPT
- blk->midstate = (unsigned char *)state;
-#endif
}
#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
diff --git a/miner.h b/miner.h
index da62dcb..5cc683c 100644
--- a/miner.h
+++ b/miner.h
@@ -671,7 +671,7 @@ typedef struct {
cl_uint zeroA, zeroB;
cl_uint oneA, twoA, threeA, fourA, fiveA, sixA, sevenA;
#ifdef USE_SCRYPT
- unsigned char *midstate;
+ struct work *work;
#endif
} dev_blk_ctx;
#else