Import more prepared constants into poclbm kernel. Conflicts: poclbm120213.cl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
diff --git a/device-gpu.c b/device-gpu.c
index cc79d1b..ae05447 100644
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -668,7 +668,8 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
CL_SET_BLKARG(cty_b);
CL_SET_BLKARG(cty_c);
- CL_SET_BLKARG(cty_d);
+
+
CL_SET_BLKARG(cty_f);
CL_SET_BLKARG(cty_g);
CL_SET_BLKARG(cty_h);
@@ -686,6 +687,11 @@ static cl_int queue_poclbm_kernel(_clState *clState, dev_blk_ctx *blk)
CL_SET_BLKARG(fW01r);
CL_SET_BLKARG(fcty_e);
CL_SET_BLKARG(fcty_e2);
+ CL_SET_BLKARG(D1A);
+ CL_SET_BLKARG(C1addK5);
+ CL_SET_BLKARG(B1addK6);
+ CL_SET_BLKARG(W16addK16);
+ CL_SET_BLKARG(W17addK17);
CL_SET_ARG(clState->outputBuffer);
diff --git a/poclbm120213.cl b/poclbm120213.cl
index 668e4f3..6d10316 100644
--- a/poclbm120213.cl
+++ b/poclbm120213.cl
@@ -69,10 +69,13 @@ __constant uint K[64] = {
__kernel void search(const uint state0, const uint state1, const uint state2, const uint state3,
const uint state4, const uint state5, const uint state6, const uint state7,
- const uint b1, const uint c1, const uint d1,
+ const uint b1, const uint c1,
const uint f1, const uint g1, const uint h1,
const u base,
- const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r, const uint fcty_e, const uint fcty_e2,
+ const uint fw0, const uint fw1, const uint fw2, const uint fw3, const uint fw15, const uint fw01r,
+ const uint fcty_e, const uint fcty_e2,
+ const uint D1A, const uint C1addK5, const uint B1addK6,
+ const uint W16addK16, const uint W17addK17,
__global uint * output)
{
u W[24];
@@ -93,29 +96,26 @@ Vals[0]=Vals[4];
Vals[0]+=state0;
Vals[3]=(rotr(Vals[0],6)^rotr(Vals[0],11)^rotr(Vals[0],25));
-Vals[3]+=d1;
Vals[3]+=ch(Vals[0],b1,c1);
-Vals[3]+=0xB956C25B;
+Vals[3]+=D1A;
Vals[7]=Vals[3];
Vals[7]+=h1;
Vals[4]+=fcty_e2;
Vals[3]+=(rotr(Vals[4],2)^rotr(Vals[4],13)^rotr(Vals[4],22));
-Vals[2]=c1;
+Vals[2]=C1addK5;
Vals[2]+=(rotr(Vals[7],6)^rotr(Vals[7],11)^rotr(Vals[7],25));
Vals[2]+=ch(Vals[7],Vals[0],b1);
-Vals[2]+=K[5];
Vals[6]=Vals[2];
Vals[6]+=g1;
Vals[3]+=Ma2(g1,Vals[4],f1);
Vals[2]+=(rotr(Vals[3],2)^rotr(Vals[3],13)^rotr(Vals[3],22));
-Vals[1]=b1;
+Vals[1]=B1addK6;
Vals[1]+=(rotr(Vals[6],6)^rotr(Vals[6],11)^rotr(Vals[6],25));
Vals[1]+=ch(Vals[6],Vals[7],Vals[0]);
-Vals[1]+=K[6];
Vals[5]=Vals[1];
Vals[5]+=f1;
@@ -177,16 +177,14 @@ Vals[4]+=Vals[0];
Vals[0]+=(rotr(Vals[1],2)^rotr(Vals[1],13)^rotr(Vals[1],22));
Vals[7]+=(rotr(Vals[4],6)^rotr(Vals[4],11)^rotr(Vals[4],25));
Vals[7]+=ch(Vals[4],Vals[5],Vals[6]);
-Vals[7]+=K[16];
-Vals[7]+=fw0;
+Vals[7]+=W16addK16;
Vals[0]+=Ma(Vals[3],Vals[1],Vals[2]);
Vals[3]+=Vals[7];
Vals[7]+=(rotr(Vals[0],2)^rotr(Vals[0],13)^rotr(Vals[0],22));
Vals[7]+=Ma(Vals[2],Vals[0],Vals[1]);
Vals[6]+=(rotr(Vals[3],6)^rotr(Vals[3],11)^rotr(Vals[3],25));
Vals[6]+=ch(Vals[3],Vals[4],Vals[5]);
-Vals[6]+=K[17];
-Vals[6]+=fw1;
+Vals[6]+=W17addK17;
Vals[2]+=Vals[6];
Vals[6]+=(rotr(Vals[7],2)^rotr(Vals[7],13)^rotr(Vals[7],22));