Commit a9a0bba18b8f22eacd788e20a9af9539b87680dc

Con Kolivas 2012-07-16T11:53:18

Set the correct data for cldata and prepare for pad8 fixes.

diff --git a/driver-opencl.c b/driver-opencl.c
index 2ebb54f..773bf8e 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1001,6 +1001,8 @@ static cl_int queue_scrypt_kernel(_clState *clState, dev_blk_ctx *blk, cl_uint t
 	cl_int status = 0;
 	int i;
 
+	status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL,NULL);
+
 	CL_SET_ARG(clState->CLbuffer0);
 	CL_SET_ARG(clState->outputBuffer);
 	CL_SET_ARG(clState->padbuffer8);
@@ -1309,7 +1311,7 @@ static bool opencl_thread_init(struct thr_info *thr)
 	struct cgpu_info *gpu = thr->cgpu;
 	struct opencl_thread_data *thrdata;
 	_clState *clState = clStates[thr_id];
-	cl_int status;
+	cl_int status = 0;
 	thrdata = calloc(1, sizeof(*thrdata));
 	thr->cgpu_data = thrdata;
 
@@ -1348,10 +1350,13 @@ static bool opencl_thread_init(struct thr_info *thr)
 	}
 
 #ifdef USE_SCRYPT
-	if (opt_scrypt)
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, BUFFERSIZE, blank_res, 0, NULL,NULL);
+	if (opt_scrypt) {
+		if (clState->padbufsize > BUFFERSIZE)
+			blank_res = realloc(blank_res, clState->padbufsize);
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->padbuffer8, true, 0, clState->padbufsize, blank_res, 0, NULL,NULL);
+	}
 #endif
-	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+	status |= clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
 			BUFFERSIZE, blank_res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
@@ -1440,6 +1445,10 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 			   localThreads[0], gpu->intensity);
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
+
+#ifdef USE_SCRYPT
+	clState->cldata = work->data;
+#endif
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
diff --git a/ocl.c b/ocl.c
index 341d5c8..675a31c 100644
--- a/ocl.c
+++ b/ocl.c
@@ -754,8 +754,9 @@ built:
 		size_t ipt = (1024 / clState->lookup_gap + (1024 % clState->lookup_gap > 0));
 		size_t bufsize = 128 * ipt * clState->thread_concurrency;
 
-		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
+		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 80, NULL, &status);
 		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
+		clState->padbufsize = bufsize;
 	}
 #endif
 	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
diff --git a/ocl.h b/ocl.h
index b15c889..56fa9b0 100644
--- a/ocl.h
+++ b/ocl.h
@@ -24,6 +24,8 @@ typedef struct {
 	cl_mem padbuffer8;
 	size_t lookup_gap;
 	size_t thread_concurrency;
+	size_t padbufsize;
+	void * cldata;
 #endif
 	bool hasBitAlign;
 	bool hasOpenCL11plus;