Commit fb077c6d59b7d274b2c52f210026ee7d7186c4eb

Con Kolivas 2012-02-24T13:27:15

Pass vectors * worksize to kernel to avoid one op.

diff --git a/diablo120223.cl b/diablo120223.cl
index a08e47c..a222ddd 100644
--- a/diablo120223.cl
+++ b/diablo120223.cl
@@ -63,9 +63,9 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
   z ZA[930];
 
 #ifdef VECTORS4
-	const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
+	const z Znonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
 #elif defined VECTORS2
-	const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
+	const z Znonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
 #else
 	const z Znonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
 #endif
diff --git a/diakgcn120223.cl b/diakgcn120223.cl
index 89421a2..9635ff5 100644
--- a/diakgcn120223.cl
+++ b/diakgcn120223.cl
@@ -54,13 +54,13 @@ __kernel
 	u W[16];
 
 #ifdef VECTORS8
-	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
-#elif defined VECTORS4
-	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
+	const u nonce = base + (uint)(get_local_id(0)) * 8u + (uint)(get_group_id(0)) * (WORKVEC);
+#elif VECTORS4
+	const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
 #elif defined VECTORS2
-	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
+	const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
 #else
-	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
+	const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
 #endif
 
 	V[0] = PreVal0 + nonce;
diff --git a/ocl.c b/ocl.c
index e5eb6ec..ae210e2 100644
--- a/ocl.c
+++ b/ocl.c
@@ -505,8 +505,8 @@ build:
 	/* create a cl program executable for all the devices specified */
 	char *CompilerOptions = calloc(1, 256);
 
-	sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d",
-		(int)clState->wsize, clState->vwidth);
+	sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC%d",
+		(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
 	applog(LOG_DEBUG, "Setting worksize to %d", clState->wsize);
 	if (clState->vwidth > 1)
 		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);
diff --git a/phatk120223.cl b/phatk120223.cl
index f6f2078..a1f4fc2 100644
--- a/phatk120223.cl
+++ b/phatk120223.cl
@@ -188,12 +188,12 @@ void search(	const uint state0, const uint state1, const uint state2, const uint
 
 #ifdef VECTORS4
 	//Less dependencies to get both the local id and group id and then add them
-	W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
+	W[3] = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
 	uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
 	//Since only the 2 LSB is opposite between the nonces, we can save an instruction by flipping the 4 bits in W18 rather than the 1 bit in W3
 	W[18] = PreW18 + (u){r, r ^ 0x2004000U, r ^ 0x4008000U, r ^ 0x600C000U};
 #elif defined VECTORS2
-	W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
+	W[3] = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
 	uint r = rot(W[3].x,25u)^rot(W[3].x,14u)^((W[3].x)>>3U);
 	W[18] = PreW18 + (u){r, r ^ 0x2004000U};
 #else
diff --git a/poclbm120223.cl b/poclbm120223.cl
index 4567fb8..47dbb5b 100644
--- a/poclbm120223.cl
+++ b/poclbm120223.cl
@@ -83,9 +83,9 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co
 	u *Vals = &W[16]; // Now put at W[16] to be in same array
 
 #ifdef VECTORS4
-	const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
+	const u nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKVEC);
 #elif defined VECTORS2
-	const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
+	const u nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKVEC);
 #else
 	const u nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
 #endif