Commit 775a27281a34dba280ea1dfef069c8affa418ba4

ckolivas 2012-10-16T15:10:22

Remove atomic ops from opencl kernels given rarity of more than once nonce on the same wavefront and the potential increased ramspeed requirements to use the atomics.

diff --git a/diablo120823.cl b/diablo120823.cl
index b287f4f..7b3738b 100644
--- a/diablo120823.cl
+++ b/diablo120823.cl
@@ -1243,12 +1243,7 @@ void search(
     ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
     
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #if defined(VECTORS4)
 	bool result = any(ZA[924] == 0x136032EDU);
diff --git a/diakgcn120823.cl b/diakgcn120823.cl
index b8e7686..b87fbde 100644
--- a/diakgcn120823.cl
+++ b/diakgcn120823.cl
@@ -572,12 +572,7 @@ __kernel
 	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #ifdef VECTORS4
 	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
diff --git a/phatk120823.cl b/phatk120823.cl
index fb1ce3c..60f2870 100644
--- a/phatk120823.cl
+++ b/phatk120823.cl
@@ -388,12 +388,7 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 		(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64))  + s1(64+59)+ ch(59+64)));
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #ifdef VECTORS4
 	bool result = W[117].x & W[117].y & W[117].z & W[117].w;
diff --git a/poclbm120823.cl b/poclbm120823.cl
index 9ae2ee9..d30f73f 100644
--- a/poclbm120823.cl
+++ b/poclbm120823.cl
@@ -1322,12 +1322,7 @@ Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
 Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 #if defined(VECTORS2) || defined(VECTORS4)
 	if (any(Vals[2] == 0x136032edU)) {
diff --git a/scrypt120823.cl b/scrypt120823.cl
index 4b88458..e11ab19 100644
--- a/scrypt120823.cl
+++ b/scrypt120823.cl
@@ -683,12 +683,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 }
 
 #define FOUND (0x0F)
-
-#if defined(OCL1)
-	#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
-#else
-	#define SETFOUND(Xnonce) output[atomic_add(&output[FOUND], 1)] = Xnonce
-#endif
+#define SETFOUND(Xnonce) output[output[FOUND]++] = Xnonce
 
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global const uint4 * restrict input,