Move to 256 sized buffers and don't risk overwrite by using only 127 mask.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
diff --git a/findnonce.c b/findnonce.c
index 23c01e6..45b5910 100644
--- a/findnonce.c
+++ b/findnonce.c
@@ -181,14 +181,14 @@ static void *postcalc_hash(void *userdata)
pthread_detach(pthread_self());
cycle:
- while (entry < OUTBUFFERS) {
+ while (entry < FOUND) {
if (pcd->res[entry]) {
nonce = pcd->res[entry++];
break;
}
entry++;
}
- if (entry == OUTBUFFERS)
+ if (entry == FOUND)
goto out;
A = blk->cty_a; B = blk->cty_b;
@@ -231,7 +231,7 @@ cycle:
hw_errors++;
thr->cgpu->hw_errors++;
}
- if (entry < OUTBUFFERS)
+ if (entry < FOUND)
goto cycle;
out:
free(pcd);
diff --git a/findnonce.h b/findnonce.h
index b22afeb..93cd1fe 100644
--- a/findnonce.h
+++ b/findnonce.h
@@ -5,9 +5,10 @@
#define MAXTHREADS (0xFFFFFFFEULL)
/* Maximum worksize 4k to match page size */
-#define MAXBUFFERS (4095)
-#define BUFFERSIZE (sizeof(uint32_t) * (MAXBUFFERS + 1))
-#define OUTBUFFERS (0xFF)
+#define MAXBUFFERS (0xFF)
+#define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
+#define FOUND (0x80)
+/* #define NFLAG (0x7F) Just for reference */
#ifdef HAVE_OPENCL
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
diff --git a/main.c b/main.c
index e31718b..03fc3ca 100644
--- a/main.c
+++ b/main.c
@@ -3610,7 +3610,7 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); goto out; }
/* MAXBUFFERS entry is used as a flag to say nonces exist */
- if (res[MAXBUFFERS]) {
+ if (res[FOUND]) {
/* Clear the buffer again */
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
diff --git a/phatk110816.cl b/phatk110816.cl
index 9b0b777..ebcf0bb 100644
--- a/phatk110816.cl
+++ b/phatk110816.cl
@@ -387,42 +387,42 @@ void search( const uint state0, const uint state1, const uint state2, const uint
u v = W[117] + W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]);
u g = -(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64));
-
-#define NFLAG (0xFF)
+
+#define FOUND (0x80)
+#define NFLAG (0x7F)
#ifdef VECTORS4
if (v.x == g.x)
{
- output[MAXBUFFERS] = output[NFLAG & W[3].x] = W[3].x;
+ output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
}
if (v.y == g.y)
{
- output[MAXBUFFERS] = output[NFLAG & W[3].y] = W[3].y;
+ output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
}
if (v.z == g.z)
{
- output[MAXBUFFERS] = output[NFLAG & W[3].z] = W[3].z;
+ output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
}
if (v.w == g.w)
{
- output[MAXBUFFERS] = output[NFLAG & W[3].w] = W[3].w;
+ output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
}
#else
#ifdef VECTORS2
if (v.x == g.x)
{
- output[MAXBUFFERS] = output[NFLAG & W[3].x] = W[3].x;
+ output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
}
if (v.y == g.y)
{
- output[MAXBUFFERS] = output[NFLAG & W[3].y] = W[3].y;
+ output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
}
#else
if (v == g)
{
- output[MAXBUFFERS] = output[NFLAG & W[3]] = W[3];
+ output[FOUND] = output[NFLAG & W[3]] = W[3];
}
#endif
#endif
}
-
diff --git a/poclbm110816.cl b/poclbm110816.cl
index a16e4de..a222c6e 100644
--- a/poclbm110816.cl
+++ b/poclbm110816.cl
@@ -625,32 +625,32 @@ __kernel void search( const uint state0, const uint state1, const uint state2, c
Vals[7] = Vals[7] + Vals[3] + (rotr(Vals[0], 6) ^ rotr(Vals[0], 11) ^ rotr(Vals[0], 25)) + ch(Vals[0], Vals[1], Vals[2]) + K[60] + W[12];
-#define MAXBUFFERS (4095)
-#define NFLAG (0xFF)
+#define FOUND (0x80)
+#define NFLAG (0x7F)
#if defined(VECTORS4) || defined(VECTORS2)
if (Vals[7].x == -0x5be0cd19U)
{
- output[MAXBUFFERS] = output[NFLAG & nonce.x] = nonce.x;
+ output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
}
if (Vals[7].y == -0x5be0cd19U)
{
- output[MAXBUFFERS] = output[NFLAG & nonce.y] = nonce.y;
+ output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
}
#ifdef VECTORS4
if (Vals[7].z == -0x5be0cd19U)
{
- output[MAXBUFFERS] = output[NFLAG & nonce.z] = nonce.z;
+ output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
}
if (Vals[7].w == -0x5be0cd19U)
{
- output[MAXBUFFERS] = output[NFLAG & nonce.w] = nonce.w;
+ output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
}
#endif
#else
if (Vals[7] == -0x5be0cd19U)
{
- output[MAXBUFFERS] = output[NFLAG & nonce] = nonce;
+ output[FOUND] = output[NFLAG & nonce] = nonce;
}
#endif
-}
\ No newline at end of file
+}