Merge branch 'diakgcn' of https://github.com/Diapolo/cgminer into diakgcn
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
diff --git a/device-gpu.c b/device-gpu.c
index f7cbcc2..9091de5 100644
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -740,7 +740,7 @@ static cl_int queue_phatk_kernel(_clState *clState, dev_blk_ctx *blk)
nonces = alloca(sizeof(uint) * vwidth);
for (i = 0; i < vwidth; i++)
nonces[i] = blk->nonce + i;
- status |= clSetKernelArg(*kernel, num++, vwidth * sizeof(uint), (void *)nonces);
+ CL_SET_VARG(vwidth, nonces);
CL_SET_BLKARG(W16);
CL_SET_BLKARG(W17);
diff --git a/diakgcn120208.cl b/diakgcn120208.cl
index 03456b1..5038838 100644
--- a/diakgcn120208.cl
+++ b/diakgcn120208.cl
@@ -57,7 +57,7 @@ __kernel
const uint state0, const uint state1, const uint state2, const uint state3,
const uint state4, const uint state5, const uint state6, const uint state7,
const uint state0A, const uint state0B,
- __global int * output)
+ __global uint * output)
{
u W[17];
u V[8];
diff --git a/findnonce.c b/findnonce.c
index 299452f..7d12be7 100644
--- a/findnonce.c
+++ b/findnonce.c
@@ -66,9 +66,6 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->cty_a = A;
blk->cty_b = B;
blk->cty_c = C;
-
- blk->C1addK5 = C + SHA256_K[5];
-
blk->cty_d = D;
blk->D1A = D + 0xb956c25b;
@@ -93,12 +90,12 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->W16 = blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
blk->W17 = blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
- blk->PreVal4 = blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
+ blk->PreVal4 = blk->fcty_e = blk->ctx_e + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
blk->T1 = blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
blk->PreVal4_2 = blk->PreVal4 + blk->T1;
- blk->PreVal0 = blk->PreVal4 + state[0];
+ blk->PreVal0 = blk->PreVal4 + blk->ctx_a;
blk->PreW31 = 0x00000280 + (rotr(blk->W16, 7) ^ rotr(blk->W16, 18) ^ (blk->W16 >> 3));
- blk->PreW32 = blk->W16 + ((rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3)));
+ blk->PreW32 = blk->W16 + (rotr(blk->W17, 7) ^ rotr(blk->W17, 18) ^ (blk->W17 >> 3));
blk->PreW18 = data[2] + (rotr(blk->W16, 17) ^ rotr(blk->W16, 19) ^ (blk->W16 >> 10));
blk->PreW19 = 0x11002000 + (rotr(blk->W17, 17) ^ rotr(blk->W17, 19) ^ (blk->W17 >> 10));
@@ -117,6 +114,7 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
blk->PreVal4addT1 = blk->PreVal4 + blk->T1;
blk->T1substate0 = blk->ctx_a - blk->T1;
+ blk->C1addK5 = blk->cty_c + SHA256_K[5];
blk->B1addK6 = blk->cty_b + SHA256_K[6];
blk->PreVal0addK7 = blk->PreVal0 + SHA256_K[7];
blk->W16addK16 = blk->W16 + SHA256_K[16];
diff --git a/ocl.c b/ocl.c
index 5311557..e0aafb1 100644
--- a/ocl.c
+++ b/ocl.c
@@ -302,6 +302,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
find = strstr(extensions, camo);
if (find)
clState->hasBitAlign = true;
+
+ /* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
+ char * devoclver = malloc(1024);
+ const char * ocl10 = "OpenCL 1.0";
+
+ status = clGetDeviceInfo(devices[gpu], CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL);
+ if (status != CL_SUCCESS) {
+ applog(LOG_ERR, "Error: Failed to clGetDeviceInfo when trying to get CL_DEVICE_VERSION");
+ return NULL;
+ }
+ find = strstr(devoclver, ocl10);
+ if !(find)
+ clState->hasOpenCL11plus = true;
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
if (status != CL_SUCCESS) {
diff --git a/ocl.h b/ocl.h
index d52bfce..ccab8b8 100644
--- a/ocl.h
+++ b/ocl.h
@@ -18,6 +18,7 @@ typedef struct {
cl_program program;
cl_mem outputBuffer;
bool hasBitAlign;
+ bool hasOpenCL11plus;
cl_uint preferred_vwidth;
size_t max_work_size;
size_t work_size;