Test the target in the actual scrypt kernel itself saving further calculations.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
diff --git a/driver-opencl.c b/driver-opencl.c
index 3ec60dd..cd2c9ab 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -998,8 +998,10 @@ static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u
char *midstate = blk->work->midstate;
cl_kernel *kernel = &clState->kernel;
unsigned int num = 0;
+ cl_uint le_target;
cl_int status = 0;
+ le_target = ~swab32((uint32_t)blk->work->target[7]);
clState->cldata = blk->work->data;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
@@ -1008,6 +1010,7 @@ static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u
CL_SET_ARG(clState->padbuffer8);
CL_SET_VARG(4, &midstate[0]);
CL_SET_VARG(4, &midstate[16]);
+ CL_SET_ARG(le_target);
return status;
}
diff --git a/scrypt120713.cl b/scrypt120713.cl
index f7c1a6c..95b006e 100644
--- a/scrypt120713.cl
+++ b/scrypt120713.cl
@@ -689,7 +689,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
#define NFLAG (0x7F)
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void search(__global const uint4 * restrict input, __global uint*restrict output, __global uint4*restrict padcache, const uint4 midstate0, const uint4 midstate16)
+__kernel void search(__global const uint4 * restrict input,
+__global uint*restrict output, __global uint4*restrict padcache,
+const uint4 midstate0, const uint4 midstate16, const uint target)
{
uint gid = get_global_id(0);
uint4 X[8];
@@ -722,7 +724,7 @@ __kernel void search(__global const uint4 * restrict input, __global uint*restri
SHA256_fixed(&tmp0,&tmp1);
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
- if ((ostate1.w&0xFFFF) == 0)
+ if (!(ostate1.w&target))
output[FOUND] = output[NFLAG & gid] = gid;
}