Merge pull request #158 from Diapolo/master diakgcn update
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
diff --git a/diakgcn120223.cl b/diakgcn120223.cl
index de9ce58..ad981a6 100644
--- a/diakgcn120223.cl
+++ b/diakgcn120223.cl
@@ -1,11 +1,9 @@
-// DiaKGCN 24-02-2012 - OpenCL kernel by Diapolo
+// DiaKGCN 16-03-2012 - OpenCL kernel by Diapolo
//
// Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3.
// The kernel was rewritten by me (Diapolo) and is still public-domain!
-#ifdef VECTORS8
- typedef uint8 u;
-#elif defined VECTORS4
+#ifdef VECTORS4
typedef uint4 u;
#elif defined VECTORS2
typedef uint2 u;
@@ -53,9 +51,7 @@ __kernel
u V[8];
u W[16];
-#ifdef VECTORS8
- const u nonce = (uint)(get_local_id(0)) * 8U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base;
-#elif defined VECTORS4
+#ifdef VECTORS4
const u nonce = (uint)(get_local_id(0)) * 4U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base;
#elif defined VECTORS2
const u nonce = (uint)(get_local_id(0)) * 2U + (uint)(get_group_id(0)) * (uint)(WORKVEC) + base;
@@ -116,9 +112,7 @@ __kernel
//----------------------------------------------------------------------------------
-#ifdef VECTORS8
- W[0] = PreW18 + (u)(rotr25(nonce.s0), rotr25(nonce.s0) ^ 0x2004000U, rotr25(nonce.s0) ^ 0x4008000U, rotr25(nonce.s0) ^ 0x600c000U, rotr25(nonce.s0) ^ 0x8010000U, rotr25(nonce.s0) ^ 0xa014000U, rotr25(nonce.s0) ^ 0xc018000U, rotr25(nonce.s0) ^ 0xe01c000U);
-#elif defined VECTORS4
+#ifdef VECTORS4
W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U, rotr25(nonce.x) ^ 0x4008000U, rotr25(nonce.x) ^ 0x600c000U);
#elif defined VECTORS2
W[0] = PreW18 + (u)(rotr25(nonce.x), rotr25(nonce.x) ^ 0x2004000U);
@@ -141,8 +135,8 @@ __kernel
W[14] = W[7] + PreW32 + rotr15(W[12]);
W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]);
- V[1] += 0x0fc19dc6U + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
- V[5] = 0x0fc19dc6U + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+ V[1] += 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0];
+ V[5] = 0x0fc19dc6U + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + W[0] + rotr30(V[6]) + ma(V[7], V[0], V[6]);
V[0] += 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
V[4] = 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
@@ -571,59 +565,15 @@ __kernel
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-
#define FOUND (0x80)
#define NFLAG (0x7F)
-#ifdef VECTORS8
- V[7] ^= 0x136032edU;
-
- bool result = V[7].s0 & V[7].s1 & V[7].s2 & V[7].s3 & V[7].s4 & V[7].s5 & V[7].s6 & V[7].s7;
-
- if (!result) {
- if (!V[7].s0)
- output[FOUND] = output[NFLAG & nonce.s0] = nonce.s0;
- if (!V[7].s1)
- output[FOUND] = output[NFLAG & nonce.s1] = nonce.s1;
- if (!V[7].s2)
- output[FOUND] = output[NFLAG & nonce.s2] = nonce.s2;
- if (!V[7].s3)
- output[FOUND] = output[NFLAG & nonce.s3] = nonce.s3;
- if (!V[7].s4)
- output[FOUND] = output[NFLAG & nonce.s4] = nonce.s4;
- if (!V[7].s5)
- output[FOUND] = output[NFLAG & nonce.s5] = nonce.s5;
- if (!V[7].s6)
- output[FOUND] = output[NFLAG & nonce.s6] = nonce.s6;
- if (!V[7].s7)
- output[FOUND] = output[NFLAG & nonce.s7] = nonce.s7;
- }
-#elif defined VECTORS4
- V[7] ^= 0x136032edU;
-
- bool result = V[7].x & V[7].y & V[7].z & V[7].w;
-
- if (!result) {
- if (!V[7].x)
- output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
- if (!V[7].y)
- output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
- if (!V[7].z)
- output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
- if (!V[7].w)
- output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
- }
+#ifdef VECTORS4
+ if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU))
+ output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w));
#elif defined VECTORS2
- V[7] ^= 0x136032edU;
-
- bool result = V[7].x & V[7].y;
-
- if (!result) {
- if (!V[7].x)
- output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
- if (!V[7].y)
- output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
- }
+ if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU))
+ output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y;
#else
if (V[7] == 0x136032edU)
output[FOUND] = output[NFLAG & nonce] = nonce;