Set the correct data for cldata and prepare for pad8 fixes.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
diff --git a/driver-opencl.c b/driver-opencl.c
index 2ebb54f..773bf8e 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1001,6 +1001,8 @@ static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
cl_int status = 0;
int i;
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
+
CL_SET_ARG(clState->CLbuffer0);
CL_SET_ARG(clState->outputBuffer);
CL_SET_ARG(clState->padbuffer8);
@@ -1309,7 +1311,7 @@ static bool opencl_thread_init(struct thr_info *thr)
struct cgpu_info *gpu = thr->cgpu;
struct opencl_thread_data *thrdata;
_clState *clState = clStates[thr_id];
- cl_int status;
+ cl_int status = 0;
thrdata = calloc(1, sizeof(*thrdata));
thr->cgpu_data = thrdata;
@@ -1348,10 +1350,13 @@ static bool opencl_thread_init(struct thr_info *thr)
}
#ifdef USE_SCRYPT
- if (opt_scrypt)
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, BUFFERSIZE, blank_res, 0, NULL,NULL);
+ if (opt_scrypt) {
+ if (clState->padbufsize > BUFFERSIZE)
+ blank_res = realloc(blank_res, clState->padbufsize);
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->padbuffer8, true, 0, clState->padbufsize, blank_res, 0, NULL,NULL);
+ }
#endif
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+ status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
@@ -1440,6 +1445,10 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
localThreads[0], gpu->intensity);
if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes;
+
+#ifdef USE_SCRYPT
+ clState->cldata = work->data;
+#endif
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
diff --git a/ocl.c b/ocl.c
index 341d5c8..675a31c 100644
--- a/ocl.c
+++ b/ocl.c
@@ -754,8 +754,9 @@ built:
size_t ipt = (1024 / clState->lookup_gap + (1024 % clState->lookup_gap > 0));
size_t bufsize = 128 * ipt * clState->thread_concurrency;
- clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
+ clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 80, NULL, &status);
clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
+ clState->padbufsize = bufsize;
}
#endif
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
diff --git a/ocl.h b/ocl.h
index b15c889..56fa9b0 100644
--- a/ocl.h
+++ b/ocl.h
@@ -24,6 +24,8 @@ typedef struct {
cl_mem padbuffer8;
size_t lookup_gap;
size_t thread_concurrency;
+ size_t padbufsize;
+ void * cldata;
#endif
bool hasBitAlign;
bool hasOpenCL11plus;