Merge pull request #122 from Diapolo/master diakgcn - fixed no vectors hw-errors
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
diff --git a/diakgcn120216.cl b/diakgcn120216.cl
index a5df195..da3b2a2 100644
--- a/diakgcn120216.cl
+++ b/diakgcn120216.cl
@@ -342,14 +342,22 @@ __kernel
//----------------------------------------------------------------------------------
- W[0] = state0 + V[0];
- W[1] = state1 + V[1];
- W[2] = state2 + V[2];
- W[3] = state3 + V[3];
- W[4] = state4 + V[4];
- W[5] = state5 + V[5];
- W[6] = state6 + V[6];
- W[7] = state7 + V[7];
+ W[0] = state0 + V[0] + rotr25(state1 + V[1]);
+ W[1] = state1 + V[1] + 0x00a00000 + rotr25(state2 + V[2]);
+ W[2] = state2 + V[2] + rotr15(W[0]) + rotr25(state3 + V[3]);
+ W[3] = state3 + V[3] + rotr15(W[1]) + rotr25(state4 + V[4]);
+ W[4] = state4 + V[4] + rotr15(W[2]) + rotr25(state5 + V[5]);
+ W[5] = state5 + V[5] + rotr15(W[3]) + rotr25(state6 + V[6]);
+ W[6] = state6 + V[6] + 0x00000100 + rotr15(W[4]) + rotr25(state7 + V[7]);
+ W[7] = state7 + V[7] + W[0] + 0x11002000 + rotr15(W[5]);
+ W[8] = W[1] + 0x80000000 + rotr15(W[6]);
+ W[9] = W[2] + rotr15(W[7]);
+ W[10] = W[3] + rotr15(W[8]);
+ W[11] = W[4] + rotr15(W[9]);
+ W[12] = W[5] + rotr15(W[10]);
+ W[13] = W[6] + rotr15(W[11]);
+ W[14] = W[7] + 0x00400022 + rotr15(W[12]);
+ W[15] = W[8] + 0x00000100 + rotr15(W[13]) + rotr25(W[0]);
// 0x71374491 + 0x1f83d9ab + state1
const u state1AaddV1 = state1A + V[1];
@@ -422,25 +430,6 @@ __kernel
V[4] += 0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
V[0] = 0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
-//----------------------------------------------------------------------------------
-
- W[0] = W[0] + rotr25(W[1]);
- W[1] = W[1] + 0x00a00000 + rotr25(W[2]);
- W[2] = W[2] + rotr15(W[0]) + rotr25(W[3]);
- W[3] = W[3] + rotr15(W[1]) + rotr25(W[4]);
- W[4] = W[4] + rotr15(W[2]) + rotr25(W[5]);
- W[5] = W[5] + rotr15(W[3]) + rotr25(W[6]);
- W[6] = W[6] + 0x00000100 + rotr15(W[4]) + rotr25(W[7]);
- W[7] = W[7] + W[0] + 0x11002000 + rotr15(W[5]);
- W[8] = W[1] + 0x80000000 + rotr15(W[6]);
- W[9] = W[2] + rotr15(W[7]);
- W[10] = W[3] + rotr15(W[8]);
- W[11] = W[4] + rotr15(W[9]);
- W[12] = W[5] + rotr15(W[10]);
- W[13] = W[6] + rotr15(W[11]);
- W[14] = W[7] + 0x00400022 + rotr15(W[12]);
- W[15] = W[8] + 0x00000100 + rotr15(W[13]) + rotr25(W[0]);
-
V[3] += 0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
V[7] = 0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);