Commit a3103d9d5aeb96233a1f2a4452b0172531006a4b

Con Kolivas 2012-02-19T03:49:52

Merge pull request #122 from Diapolo/master diakgcn - fixed no vectors hw-errors

diff --git a/diakgcn120216.cl b/diakgcn120216.cl
index a5df195..da3b2a2 100644
--- a/diakgcn120216.cl
+++ b/diakgcn120216.cl
@@ -342,14 +342,22 @@ __kernel
 
 //----------------------------------------------------------------------------------
 
-	W[0] = state0 + V[0];
-	W[1] = state1 + V[1];
-	W[2] = state2 + V[2];
-	W[3] = state3 + V[3];
-	W[4] = state4 + V[4];
-	W[5] = state5 + V[5];
-	W[6] = state6 + V[6];
-	W[7] = state7 + V[7];
+	 W[0] = state0 + V[0] + rotr25(state1 + V[1]);
+	 W[1] = state1 + V[1] + 0x00a00000 + rotr25(state2 + V[2]);
+	 W[2] = state2 + V[2] + rotr15(W[0]) + rotr25(state3 + V[3]);
+	 W[3] = state3 + V[3] + rotr15(W[1]) + rotr25(state4 + V[4]);
+	 W[4] = state4 + V[4] + rotr15(W[2]) + rotr25(state5 + V[5]);
+	 W[5] = state5 + V[5] + rotr15(W[3]) + rotr25(state6 + V[6]);
+	 W[6] = state6 + V[6] + 0x00000100 + rotr15(W[4]) + rotr25(state7 + V[7]);	
+	 W[7] = state7 + V[7] + W[0] + 0x11002000 + rotr15(W[5]);
+	 W[8] = W[1] + 0x80000000 + rotr15(W[6]);	
+	 W[9] = W[2] + rotr15(W[7]);
+	W[10] = W[3] + rotr15(W[8]);
+	W[11] = W[4] + rotr15(W[9]);
+	W[12] = W[5] + rotr15(W[10]);
+	W[13] = W[6] + rotr15(W[11]);
+	W[14] = W[7] + 0x00400022 + rotr15(W[12]);
+	W[15] = W[8] + 0x00000100 + rotr15(W[13]) + rotr25(W[0]);
 
 	// 0x71374491 + 0x1f83d9ab + state1
 	const u state1AaddV1 = state1A + V[1];
@@ -422,25 +430,6 @@ __kernel
 	V[4] += 0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
 	V[0] =  0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-//----------------------------------------------------------------------------------
-
-	 W[0] = W[0] + rotr25(W[1]);
-	 W[1] = W[1] + 0x00a00000 + rotr25(W[2]);
-	 W[2] = W[2] + rotr15(W[0]) + rotr25(W[3]);
-	 W[3] = W[3] + rotr15(W[1]) + rotr25(W[4]);
-	 W[4] = W[4] + rotr15(W[2]) + rotr25(W[5]);
-	 W[5] = W[5] + rotr15(W[3]) + rotr25(W[6]);
-	 W[6] = W[6] + 0x00000100 + rotr15(W[4]) + rotr25(W[7]);	
-	 W[7] = W[7] + W[0] + 0x11002000 + rotr15(W[5]);
-	 W[8] = W[1] + 0x80000000 + rotr15(W[6]);	
-	 W[9] = W[2] + rotr15(W[7]);
-	W[10] = W[3] + rotr15(W[8]);
-	W[11] = W[4] + rotr15(W[9]);
-	W[12] = W[5] + rotr15(W[10]);
-	W[13] = W[6] + rotr15(W[11]);
-	W[14] = W[7] + 0x00400022 + rotr15(W[12]);
-	W[15] = W[8] + 0x00000100 + rotr15(W[13]) + rotr25(W[0]);
-
 	V[3] += 0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
 	V[7] =  0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);