Commit 4fbe5bed152a55e3f5baaa012e88c11d2f335bb8

Con Kolivas 2012-08-23T23:25:32

OpenCL 1.0 does not have native atomic_add and extremely slow support with atom_add so detect opencl1.0 and use a non-atomic workaround.

diff --git a/diablo120823.cl b/diablo120823.cl
index 4687c5b..f4055aa 100644
--- a/diablo120823.cl
+++ b/diablo120823.cl
@@ -1244,28 +1244,33 @@ void search(
     
 #define FOUND (0x0F)
 
+#if defined(OCL1)
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		(Xfound) = output[FOUND];	\
+		output[FOUND] += 1;		\
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#else
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		Xfound = atomic_add(&output[FOUND], 1); \
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#endif
+
 #if defined(VECTORS4)
 	bool result = any(ZA[924] == 0x136032EDU);
 
 	if (result) {
 		uint found;
 
-		if (ZA[924].x == 0x136032EDU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = Znonce.x;
-		}
-		if (ZA[924].y == 0x136032EDU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = Znonce.y;
-		}
-		if (ZA[924].z == 0x136032EDU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = Znonce.z;
-		}
-		if (ZA[924].w == 0x136032EDU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = Znonce.w;
-		}
+		if (ZA[924].x == 0x136032EDU)
+			SETFOUND(found, Znonce.x);
+		if (ZA[924].y == 0x136032EDU)
+			SETFOUND(found, Znonce.y);
+		if (ZA[924].z == 0x136032EDU)
+			SETFOUND(found, Znonce.z);
+		if (ZA[924].w == 0x136032EDU)
+			SETFOUND(found, Znonce.w);
 	}
 #elif defined(VECTORS2)
 	bool result = any(ZA[924] == 0x136032EDU);
@@ -1273,19 +1278,16 @@ void search(
 	if (result) {
 		uint found;
 
-		if (ZA[924].x == 0x136032EDU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = Znonce.x;
-		}
-		if (ZA[924].y == 0x136032EDU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = Znonce.y;
-		}
+		if (ZA[924].x == 0x136032EDU)
+			SETFOUND(found, Znonce.x);
+		if (ZA[924].y == 0x136032EDU)
+			SETFOUND(found, Znonce.y);
 	}
 #else
 	if (ZA[924] == 0x136032EDU) {
-		uint found = atomic_add(&output[FOUND], 1);
-		output[found] = Znonce;
+		uint found;
+
+		SETFOUND(found, Znonce);
 	}
 #endif
 }
diff --git a/diakgcn120823.cl b/diakgcn120823.cl
index d27674f..fb4b154 100644
--- a/diakgcn120823.cl
+++ b/diakgcn120823.cl
@@ -573,44 +573,46 @@ __kernel
 
 #define FOUND (0x0F)
 
+#if defined(OCL1)
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		(Xfound) = output[FOUND];	\
+		output[FOUND] += 1;		\
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#else
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		Xfound = atomic_add(&output[FOUND], 1); \
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#endif
+
 #ifdef VECTORS4
 	if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
 		uint found;
 
-		if (V[7].x == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.x;
-		}
-		if (V[7].y == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.y;
-		}
-		if (V[7].z == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.z;
-		}
-		if (V[7].w == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.w;
-		}
+		if (V[7].x == 0x136032edU)
+			SETFOUND(found, nonce.x);
+		if (V[7].y == 0x136032edU)
+			SETFOUND(found, nonce.y);
+		if (V[7].z == 0x136032edU)
+			SETFOUND(found, nonce.z);
+		if (V[7].w == 0x136032edU)
+			SETFOUND(found, nonce.w);
 	}
 #elif defined VECTORS2
 	if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) {
 		uint found;
 
-		if (V[7].x == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.x;
-		}
-		if (V[7].y == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.y;
-		}
+		if (V[7].x == 0x136032edU)
+			SETFOUND(found, nonce.x);
+		if (V[7].y == 0x136032edU)
+			SETFOUND(found, nonce.y);
 	}
 #else
 	if (V[7] == 0x136032edU) {
-		uint found = atomic_add(&output[FOUND], 1);
-		output[found] = nonce;
+		uint found;
+
+		SETFOUND(found, nonce);
 	}
 #endif
 }
diff --git a/ocl.c b/ocl.c
index 7bf606c..fe45782 100644
--- a/ocl.c
+++ b/ocl.c
@@ -659,6 +659,9 @@ build:
 	if (clState->goffset)
 		strcat(CompilerOptions, " -D GOFFSET");
 
+	if (!clState->hasOpenCL11plus)
+		strcat(CompilerOptions, " -D OCL1");
+
 	applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
 	status = clBuildProgram(clState->program, 1, &devices[gpu], CompilerOptions , NULL, NULL);
 	free(CompilerOptions);
diff --git a/phatk120823.cl b/phatk120823.cl
index cf5eb09..4693cd8 100644
--- a/phatk120823.cl
+++ b/phatk120823.cl
@@ -389,46 +389,48 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 
 #define FOUND (0x0F)
 
+#if defined(OCL1)
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		(Xfound) = output[FOUND];	\
+		output[FOUND] += 1;		\
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#else
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		Xfound = atomic_add(&output[FOUND], 1); \
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#endif
+
 #ifdef VECTORS4
 	bool result = W[117].x & W[117].y & W[117].z & W[117].w;
 	if (!result) {
 		uint found;
 
-		if (!W[117].x) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = W[3].x;
-		}
-		if (!W[117].y) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = W[3].y;
-		}
-		if (!W[117].z) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = W[3].z;
-		}
-		if (!W[117].w) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = W[3].w;
-		}
+		if (!W[117].x)
+			SETFOUND(found, W[3].x);
+		if (!W[117].y)
+			SETFOUND(found, W[3].y);
+		if (!W[117].z)
+			SETFOUND(found, W[3].z);
+		if (!W[117].w)
+			SETFOUND(found, W[3].w);
 	}
 #elif defined VECTORS2
 	bool result = W[117].x & W[117].y;
 	if (!result) {
 		uint found;
 
-		if (!W[117].x) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = W[3].x;
-		}
-		if (!W[117].y) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = W[3].y;
-		}
+		if (!W[117].x)
+			SETFOUND(found, W[3].x);
+		if (!W[117].y)
+			SETFOUND(found, W[3].y);
 	}
 #else
 	if (!W[117]) {
-		uint found = atomic_add(&output[FOUND], 1);
-		output[found] = W[3];
+		uint found;
+
+		SETFOUND(found, W[3]);
 	}
 #endif
 }
diff --git a/poclbm120823.cl b/poclbm120823.cl
index a02413b..64bdb27 100644
--- a/poclbm120823.cl
+++ b/poclbm120823.cl
@@ -1323,34 +1323,40 @@ Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
 
 #define FOUND (0x0F)
 
+#if defined(OCL1)
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		(Xfound) = output[FOUND];	\
+		output[FOUND] += 1;		\
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#else
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		Xfound = atomic_add(&output[FOUND], 1); \
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#endif
+
 #if defined(VECTORS2) || defined(VECTORS4)
 
 	if (any(Vals[2] == 0x136032edU)) {
 		uint found;
 
-		if (Vals[2].x == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.x;
-		}
-		if (Vals[2].y == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.y;
-		}
+		if (Vals[2].x == 0x136032edU)
+			SETFOUND(found, nonce.x);
+		if (Vals[2].y == 0x136032edU)
+			SETFOUND(found, nonce.y);
 #if defined(VECTORS4)
-		if (Vals[2].z == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.z;
-		}
-		if (Vals[2].w == 0x136032edU) {
-			found = atomic_add(&output[FOUND], 1);
-			output[found] = nonce.w;
-		}
+		if (Vals[2].z == 0x136032edU)
+			SETFOUND(found, nonce.z);
+		if (Vals[2].w == 0x136032edU)
+			SETFOUND(found, nonce.w);
 #endif
 	}
 #else
 	if (Vals[2] == 0x136032edU) {
-		uint found = atomic_add(&output[FOUND], 1);
-		output[found] = nonce;
+		uint found;
+
+		SETFOUND(found, nonce);
 	}
 #endif
 }
diff --git a/scrypt120823.cl b/scrypt120823.cl
index 7390d2c..bcc0b70 100644
--- a/scrypt120823.cl
+++ b/scrypt120823.cl
@@ -684,6 +684,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
 
 #define FOUND (0x0F)
 
+#if defined(OCL1)
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		(Xfound) = output[FOUND];	\
+		output[FOUND] += 1;		\
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#else
+	#define SETFOUND(Xfound, Xnonce) do {	\
+		Xfound = atomic_add(&output[FOUND], 1); \
+		output[Xfound] = Xnonce;	\
+	} while (0)
+#endif
+
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void search(__global const uint4 * restrict input,
 volatile __global uint*restrict output, __global uint4*restrict padcache,
@@ -722,8 +735,9 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
 
 	bool result = (EndianSwap(ostate1.w) <= target);
 	if (result) {
-		uint found = atomic_add(&output[FOUND], 1);
-		output[found] = gid;
+		uint found;
+
+		SETFOUND(found, gid);
 	}
 }