Remove atomic ops from opencl kernels given rarity of more than once nonce on the same wavefront and the potential increased ramspeed requirements to use the atomics.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
diff --git a/diablo120823.cl b/diablo120823.cl
index b287f4f..7b3738b 100644
--- a/diablo120823.cl
+++ b/diablo120823.cl
@@ -1243,12 +1243,7 @@ void search(
ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
#define FOUND (0x0F)
-
-#if defined(OCL1)
- #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
- #define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#if defined(VECTORS4)
bool result = any(ZA[924] == 0x136032EDU);
diff --git a/diakgcn120823.cl b/diakgcn120823.cl
index b8e7686..b87fbde 100644
--- a/diakgcn120823.cl
+++ b/diakgcn120823.cl
@@ -572,12 +572,7 @@ __kernel
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
#define FOUND (0x0F)
-
-#if defined(OCL1)
- #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
- #define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#ifdef VECTORS4
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
diff --git a/phatk120823.cl b/phatk120823.cl
index fb1ce3c..60f2870 100644
--- a/phatk120823.cl
+++ b/phatk120823.cl
@@ -388,12 +388,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint
(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)));
#define FOUND (0x0F)
-
-#if defined(OCL1)
- #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
- #define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#ifdef VECTORS4
bool result = W[117].x & W[117].y & W[117].z & W[117].w;
diff --git a/poclbm120823.cl b/poclbm120823.cl
index 9ae2ee9..d30f73f 100644
--- a/poclbm120823.cl
+++ b/poclbm120823.cl
@@ -1322,12 +1322,7 @@ Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
#define FOUND (0x0F)
-
-#if defined(OCL1)
- #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
- #define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
#if defined(VECTORS2) || defined(VECTORS4)
if (any(Vals[2] == 0x136032edU)) {
diff --git a/scrypt120823.cl b/scrypt120823.cl
index 4b88458..e11ab19 100644
--- a/scrypt120823.cl
+++ b/scrypt120823.cl
@@ -683,12 +683,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#define FOUND (0x0F)
-
-#if defined(OCL1)
- #define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
- #define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search(__global const uint4 * restrict input,