Fix target testing with scrypt kernel as it would have been missing shares below target.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
diff --git a/driver-opencl.c b/driver-opencl.c
index 059a7ec..7b3f8b7 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1053,7 +1053,7 @@ static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, __maybe_u
cl_uint le_target;
cl_int status = 0;
- le_target = ~swab32(*(cl_uint *)(blk->work->target + 28));
+ le_target = *(cl_uint *)(blk->work->target + 28);
clState->cldata = blk->work->data;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
diff --git a/scrypt120713.cl b/scrypt120713.cl
index a273f02..d38f6a5 100644
--- a/scrypt120713.cl
+++ b/scrypt120713.cl
@@ -2,10 +2,7 @@
#define Ch(x,y,z) bitselect(z,y,x)
#define Maj(x,y,z) Ch((x^z),y,z)
-uint4 EndianSwap4(uint4 n)
-{
- return rotl(n&0x00FF00FF,24U)|rotl(n&0xFF00FF00,8U);
-}
+#define EndianSwap(n) (rotl(n&0x00FF00FF,24U)|rotl(n&0xFF00FF00,8U))
#define Tr2(x) (rotl(x, 30U) ^ rotl(x, 19U) ^ rotl(x, 10U))
#define Tr1(x) (rotl(x, 26U) ^ rotl(x, 21U) ^ rotl(x, 7U))
@@ -552,7 +549,7 @@ void shittify(uint4 B[8])
#pragma unroll
for(uint i=0; i<4; ++i)
- B[i] = EndianSwap4(tmp[i]);
+ B[i] = EndianSwap(tmp[i]);
tmp[0] = (uint4)(B[5].x,B[6].y,B[7].z,B[4].w);
tmp[1] = (uint4)(B[6].x,B[7].y,B[4].z,B[5].w);
@@ -561,7 +558,7 @@ void shittify(uint4 B[8])
#pragma unroll
for(uint i=0; i<4; ++i)
- B[i+4] = EndianSwap4(tmp[i]);
+ B[i+4] = EndianSwap(tmp[i]);
}
void unshittify(uint4 B[8])
@@ -574,7 +571,7 @@ void unshittify(uint4 B[8])
#pragma unroll
for(uint i=0; i<4; ++i)
- B[i] = EndianSwap4(tmp[i]);
+ B[i] = EndianSwap(tmp[i]);
tmp[0] = (uint4)(B[7].x,B[6].y,B[5].z,B[4].w);
tmp[1] = (uint4)(B[4].x,B[7].y,B[6].z,B[5].w);
@@ -583,7 +580,7 @@ void unshittify(uint4 B[8])
#pragma unroll
for(uint i=0; i<4; ++i)
- B[i+4] = EndianSwap4(tmp[i]);
+ B[i+4] = EndianSwap(tmp[i]);
}
void salsa(uint4 B[8])
@@ -723,8 +720,9 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
SHA256(&tmp0,&tmp1, X[4], X[5], X[6], X[7]);
SHA256_fixed(&tmp0,&tmp1);
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
-
- if (!(ostate1.w & target))
+
+ bool found = (EndianSwap(ostate1.w) <= target);
+ if (found)
output[FOUND] = output[NFLAG & gid] = gid;
}