Commit a731de6e45f254675b4c9c64c3595f73f26535c0

Philip Kaufmann 2012-02-23T16:14:27

revert to legacy nonce creation in the kernel without vector offset, but keep GOFFSET code removed

diff --git a/device-gpu.c b/device-gpu.c
index 233e59d..a526c70 100644
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -835,7 +835,8 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk,
 	return status;
 }
 
-static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint threads)
+static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk,
+				   __maybe_unused cl_uint threads)
 {
 	cl_kernel *kernel = &clState->kernel;
 	cl_uint vwidth = clState->vwidth;
@@ -845,7 +846,7 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint 
 
 	nonces = alloca(sizeof(uint) * vwidth);
 	for (i = 0; i < vwidth; i++)
-		nonces[i] = blk->nonce + (i * threads);
+		nonces[i] = blk->nonce + i;
 	CL_SET_VARG(vwidth, nonces);
 
 	CL_SET_BLKARG(PreVal0);
diff --git a/diakgcn120222.cl b/diakgcn120222.cl
index fa98dbd..89421a2 100644
--- a/diakgcn120222.cl
+++ b/diakgcn120222.cl
@@ -53,7 +53,15 @@ __kernel
 	u V[8];
 	u W[16];
 
-	const u nonce = base + (uint)get_global_id(0);
+#ifdef VECTORS8
+	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 3) + ((uint)get_local_id(0) << 3) + base;
+#elif defined VECTORS4
+	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 2) + ((uint)get_local_id(0) << 2) + base;
+#elif defined VECTORS2
+	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0) << 1) + ((uint)get_local_id(0) << 1) + base;
+#else
+	const u nonce = ((uint)get_group_id(0) * (uint)get_local_size(0)) + (uint)get_local_id(0) + base;
+#endif
 
 	V[0] = PreVal0 + nonce;
 	V[1] = B1;
@@ -108,7 +116,16 @@ __kernel
 
 //----------------------------------------------------------------------------------
 
+#ifdef VECTORS8
+	 W[0] = PreW18 + (u)(	rotr25(nonce.s0),             rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U,
+				rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U);
+#elif defined VECTORS4
+	 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U);
+#elif defined VECTORS2
+	 W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U);
+#else
 	 W[0] = PreW18 + rotr25(nonce);
+#endif
 	 W[1] = PreW19 + nonce;
 	 W[2] = 0x80000000U + rotr15(W[0]);
 	 W[3] = rotr15(W[1]);