Commit 8f08a775add15bb47845d1f833218bfd03b5ce71

Con Kolivas 2012-02-22T14:10:04

Use any() in kernel output code and revert breakage of diakgcn kernel.

diff --git a/DiabloMiner120221.cl b/DiabloMiner120221.cl
index a151db3..4bc7394 100644
--- a/DiabloMiner120221.cl
+++ b/DiabloMiner120221.cl
@@ -1237,26 +1237,28 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
 #define NFLAG (0x7F)
 
 #if defined(VECTORS4)
-	ZA[924] ^= 0x136032EDU;
-	bool result = ZA[924].x & ZA[924].y & ZA[924].z & ZA[924].w;
-	if (!result) {
-		if (!ZA[924].x)
-			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
-		if (!ZA[924].y)
-			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
-		if (!ZA[924].z)
-			output[FOUND] = output[NFLAG & Znonce.z] =  Znonce.z;
-		if (!ZA[924].w)
-			output[FOUND] = output[NFLAG & Znonce.w] =  Znonce.w;
+	bool result = any(ZA[924] == 0x136032EDU);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (ZA[924].x == 0x136032EDU)
+			output[NFLAG & Znonce.x] =  Znonce.x;
+		if (ZA[924].y == 0x136032EDU)
+			output[NFLAG & Znonce.y] =  Znonce.y;
+		if (ZA[924].z == 0x136032EDU)
+			output[NFLAG & Znonce.z] =  Znonce.z;
+		if (ZA[924].w == 0x136032EDU)
+			output[NFLAG & Znonce.w] =  Znonce.w;
 	}
 #elif defined(VECTORS2)
-	ZA[924] ^= 0x136032EDU;
-	bool result = ZA[924].x & ZA[924].y;
-	if (!result) {
-		if (!ZA[924].x)
-			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
-		if (!ZA[924].y)
-			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
+	bool result = any(ZA[924] == 0x136032EDU);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (ZA[924].x == 0x136032EDU)
+			output[NFLAG & Znonce.x] =  Znonce.x;
+		if (ZA[924].y == 0x136032EDU)
+			output[NFLAG & Znonce.y] =  Znonce.y;
 	}
 #else
 	if (ZA[924] == 0x136032EDU)
diff --git a/device-gpu.c b/device-gpu.c
index 4aad0e2..f878140 100644
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -748,7 +748,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
 	return status;
 }
 
-static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
+static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
+				   __maybe_unused cl_uint threads)
 {
 	cl_uint vwidth = clState->preferred_vwidth;
 	cl_kernel *kernel = &clState->kernel;
@@ -758,7 +759,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint 
 
 	nonces = alloca(sizeof(uint) * vwidth);
 	for (i = 0; i < vwidth; i++)
-		nonces[i] = blk->nonce + (i * threads);
+		nonces[i] = blk->nonce + i;
 	CL_SET_VARG(vwidth, nonces);
 
 	CL_SET_BLKARG(PreVal0);
diff --git a/diakgcn120216.cl b/diakgcn120216.cl
index 62effa4..a64d546 100644
--- a/diakgcn120216.cl
+++ b/diakgcn120216.cl
@@ -55,18 +55,30 @@ __kernel
 	u V[8];
 	u W[16];
 
-#ifdef GOFFSET
-	#ifdef VECTORS8
+#ifdef VECTORS8
+	#ifdef GOFFSET
 		const u nonce = ((uint)get_global_id(0) << 3) + (u)(0, 1, 2, 3, 4, 5, 6, 7);
-	#elif defined VECTORS4
+	#else
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
+	#endif
+#elif defined VECTORS4
+	#ifdef GOFFSET
 		const u nonce = ((uint)get_global_id(0) << 2) + (u)(0, 1, 2, 3);
-	#elif defined VECTORS2
+	#else
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
+	#endif
+#elif defined VECTORS2
+	#ifdef GOFFSET
 		const u nonce = ((uint)get_global_id(0) << 1) + (u)(0, 1);
 	#else
-		const u nonce = (uint)get_global_id(0);
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
 	#endif
 #else
-	const u nonce = base + (uint)(get_global_id(0));
+	#ifdef GOFFSET
+		const u nonce = (uint)get_global_id(0);
+	#else
+		const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
+	#endif
 #endif
 
 	V[0] = PreVal0 + nonce;
@@ -585,51 +597,54 @@ __kernel
 #ifdef VECTORS8
 	V[7] ^= 0x136032ed;
 
-	bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7;
+	bool result = any(V[7] == 0);
 
-	if (!result) {
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!V[7].s0)
-			output[FOUND] = output[NFLAG & nonce.s0] = nonce.s0;
+			output[NFLAG & nonce.s0] = nonce.s0;
 		if (!V[7].s1)
-			output[FOUND] = output[NFLAG & nonce.s1] = nonce.s1;
+			output[NFLAG & nonce.s1] = nonce.s1;
 		if (!V[7].s2)
-			output[FOUND] = output[NFLAG & nonce.s2] = nonce.s2;
+			output[NFLAG & nonce.s2] = nonce.s2;
 		if (!V[7].s3)
-			output[FOUND] = output[NFLAG & nonce.s3] = nonce.s3;
+			output[NFLAG & nonce.s3] = nonce.s3;
 		if (!V[7].s4)
-			output[FOUND] = output[NFLAG & nonce.s4] = nonce.s4;
+			output[NFLAG & nonce.s4] = nonce.s4;
 		if (!V[7].s5)
-			output[FOUND] = output[NFLAG & nonce.s5] = nonce.s5;
+			output[NFLAG & nonce.s5] = nonce.s5;
 		if (!V[7].s6)
-			output[FOUND] = output[NFLAG & nonce.s6] = nonce.s6;
+			output[NFLAG & nonce.s6] = nonce.s6;
 		if (!V[7].s7)
-			output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7;
+			output[NFLAG & nonce.s7] = nonce.s7;
 	}
 #elif defined VECTORS4
 	V[7] ^= 0x136032ed;
 
-	bool result = V[7].x & V[7].y & V[7].z & V[7].w;
+	bool result = any(V[7] == 0);
 
-	if (!result) {
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!V[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
+			output[NFLAG & nonce.x] = nonce.x;
 		if (!V[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
+			output[NFLAG & nonce.y] = nonce.y;
 		if (!V[7].z)
-			output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
+			output[NFLAG & nonce.z] = nonce.z;
 		if (!V[7].w)
-			output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
+			output[NFLAG & nonce.w] = nonce.w;
 	}
 #elif defined VECTORS2
 	V[7] ^= 0x136032ed;
 
-	bool result = V[7].x & V[7].y;
+	bool result = any(V[7] == 0);
 
-	if (!result) {
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!V[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
+			output[NFLAG & nonce.x] = nonce.x;
 		if (!V[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
+			output[NFLAG & nonce.y] = nonce.y;
 	}
 #else
 	if (V[7] == 0x136032ed)
diff --git a/findnonce.c b/findnonce.c
index d11925b..98d7f0e 100644
--- a/findnonce.c
+++ b/findnonce.c
@@ -227,12 +227,11 @@ static void *postcalc_hash(void *userdata)
 
 	pthread_detach(pthread_self());
 
-	do {
-		if (pcd->res[entry]) {
+	for (entry = 0; entry < FOUND; entry++) {
+		if (pcd->res[entry])
 			send_nonce(pcd, pcd->res[entry]);
-			nonces++;
-		}
-	} while (++entry < FOUND);
+		nonces++;
+	}
 
 	free(pcd);
 
diff --git a/phatk120213.cl b/phatk120213.cl
index 7d1c320..5c89fb9 100644
--- a/phatk120213.cl
+++ b/phatk120213.cl
@@ -391,27 +391,29 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 #define NFLAG (0x7F)
 
 #ifdef VECTORS4
-	bool result = W[117].x & W[117].y & W[117].z & W[117].w;
-	if (!result) {
+	bool result = any(W[117] == 0);
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!W[117].x)
-			output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
+			output[NFLAG & W[3].x] = W[3].x;
 		if (!W[117].y)
-			output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
+			output[NFLAG & W[3].y] = W[3].y;
 		if (!W[117].z)
-			output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
+			output[NFLAG & W[3].z] = W[3].z;
 		if (!W[117].w)
-			output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
+			output[NFLAG & W[3].w] = W[3].w;
 	}
 #elif defined VECTORS2
-	bool result = W[117].x & W[117].y;
-	if (!result) {
+	bool result = any(W[117] == 0);
+	if (result) {
+		output[FOUND] = FOUND;
 		if (!W[117].x)
-			output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
+			output[NFLAG & W[3].x] = W[3].x;
 		if (!W[117].y)
-			output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
+			output[NFLAG & W[3].y] = W[3].y;
 	}
 #else
-	if (!W[117])
+	if (W[117] == 0)
 		output[FOUND] = output[NFLAG & W[3]] = W[3];
 #endif
 }
diff --git a/poclbm120214.cl b/poclbm120214.cl
index 7e3ecff..0972fcc 100644
--- a/poclbm120214.cl
+++ b/poclbm120214.cl
@@ -1256,26 +1256,28 @@ Vals[7]+=ch(Vals[0],Vals[1],Vals[2]);
 #define NFLAG (0x7F)
 
 #if defined(VECTORS4)
-	Vals[7] ^= 0x136032ED;
-	bool result = Vals[7].x & Vals[7].y & Vals[7].z & Vals[7].w;
-	if (!result) {
-		if (!Vals[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] =  nonce.x;
-		if (!Vals[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] =  nonce.y;
-		if (!Vals[7].z)
-			output[FOUND] = output[NFLAG & nonce.z] =  nonce.z;
-		if (!Vals[7].w)
-			output[FOUND] = output[NFLAG & nonce.w] =  nonce.w;
+	bool result = any(Vals[7] == 0x136032ed);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (Vals[7].x == 0x136032ed)
+			output[NFLAG & nonce.x] =  nonce.x;
+		if (Vals[7].y == 0x136032ed)
+			output[NFLAG & nonce.y] =  nonce.y;
+		if (Vals[7].z == 0x136032ed)
+			output[NFLAG & nonce.z] =  nonce.z;
+		if (Vals[7].w == 0x136032ed)
+			output[NFLAG & nonce.w] =  nonce.w;
 	}
 #elif defined(VECTORS2)
-	Vals[7] ^= 0x136032ED;
-	bool result = Vals[7].x & Vals[7].y;
-	if (!result) {
-		if (!Vals[7].x)
-			output[FOUND] = output[NFLAG & nonce.x] =  nonce.x;
-		if (!Vals[7].y)
-			output[FOUND] = output[NFLAG & nonce.y] =  nonce.y;
+	bool result = any(Vals[7] == 0x136032ed);
+
+	if (result) {
+		output[FOUND] = FOUND;
+		if (Vals[7].x == 0x136032ed)
+			output[NFLAG & nonce.x] =  nonce.x;
+		if (Vals[7].y == 0x136032ed)
+			output[NFLAG & nonce.y] =  nonce.y;
 	}
 #else
 	if (Vals[7] == 0x136032ED)