Commit 60f8ccb313508aa8c40b921afa4659f00a119024

Con Kolivas 2012-02-13T12:51:24

Use local and group id on poclbm kernel as well.

diff --git a/device-gpu.c b/device-gpu.c
index 242cb51..cc79d1b 100644
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -651,9 +651,11 @@ static _clState *clStates[MAX_GPUDEVICES];
 
 static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
 {
+	cl_uint vwidth = clState->preferred_vwidth;
 	cl_kernel *kernel = &clState->kernel;
+	unsigned int i, num = 0;
 	cl_int status = 0;
-	int num = 0;
+	uint *nonces;
 
 	CL_SET_BLKARG(ctx_a);
 	CL_SET_BLKARG(ctx_b);
@@ -663,13 +665,18 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
 	CL_SET_BLKARG(ctx_f);
 	CL_SET_BLKARG(ctx_g);
 	CL_SET_BLKARG(ctx_h);
+
 	CL_SET_BLKARG(cty_b);
 	CL_SET_BLKARG(cty_c);
 	CL_SET_BLKARG(cty_d);
 	CL_SET_BLKARG(cty_f);
 	CL_SET_BLKARG(cty_g);
 	CL_SET_BLKARG(cty_h);
-	CL_SET_BLKARG(nonce);
+
+	nonces = alloca(sizeof(uint) * vwidth);
+	for (i = 0; i < vwidth; i++)
+		nonces[i] = blk->nonce + i;
+	CL_SET_VARG(vwidth, nonces);
 
 	CL_SET_BLKARG(fW0);
 	CL_SET_BLKARG(fW1);
diff --git a/poclbm120210.cl b/poclbm120210.cl
index 6256908..2d8331f 100644
--- a/poclbm120210.cl
+++ b/poclbm120210.cl
@@ -71,7 +71,7 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co
 						const uint state4, const uint state5, const uint state6, const uint state7,
 						const uint b1, const uint c1, const uint d1,
 						const uint f1, const uint g1, const uint h1,
-						const uint base,
+						const u base,
 						const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r, const uint fcty_e, const uint fcty_e2,
 						__global uint * output)
 {
@@ -80,11 +80,11 @@ __kernel void search(const uint state0, const uint state1, const uint state2, co
 	u nonce;
 
 #ifdef VECTORS4
-	nonce = base + (get_global_id(0)<<2) + (uint4)(0, 1, 2, 3);
+	nonce = base + (uint)(get_local_id(0)) * 4u + (uint)(get_group_id(0)) * (WORKSIZE * 4u);
 #elif defined VECTORS2
-	nonce = base + (get_global_id(0)<<1) + (uint2)(0, 1);
+	nonce = base + (uint)(get_local_id(0)) * 2u + (uint)(get_group_id(0)) * (WORKSIZE * 2u);
 #else
-	nonce = base + get_global_id(0);
+	nonce = base + get_local_id(0) + get_group_id(0) * (WORKSIZE);
 #endif
 
 	W[20] = fcty_e +  nonce;