Commit 79fec01a46f71406bfe82acc5bcc37b9fe88e708

ckolivas 2011-06-22T12:27:57

Remove the input buffer and just pass args to the kernel as per plugin design.

diff --git a/cpu-miner.c b/cpu-miner.c
index e6d7d41..7fb9c46 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -738,12 +738,52 @@ enum {
 
 static _clState *clStates[16];
 
+/* queue kernel parameter */
+static inline int qkp(cl_kernel *kernel, void *param, int param_num)
+{
+	return clSetKernelArg(*kernel, param_num, sizeof(param), param);
+}
+
+static inline cl_int queue_kernel_parameters(dev_blk_ctx *blk, cl_kernel *kernel,
+	struct _cl_mem *output)
+{
+	cl_int status = 0;
+
+	status |= qkp(kernel, (void *)&blk->ctx_a, 0);
+	status |= qkp(kernel, (void *)&blk->ctx_b, 1);
+	status |= qkp(kernel, (void *)&blk->ctx_c, 2);
+	status |= qkp(kernel, (void *)&blk->ctx_d, 3);
+	status |= qkp(kernel, (void *)&blk->ctx_e, 4);
+	status |= qkp(kernel, (void *)&blk->ctx_f, 5);
+	status |= qkp(kernel, (void *)&blk->ctx_g, 6);
+	status |= qkp(kernel, (void *)&blk->ctx_h, 7);
+	status |= qkp(kernel, (void *)&blk->cty_b, 8);
+	status |= qkp(kernel, (void *)&blk->cty_c, 9);
+	status |= qkp(kernel, (void *)&blk->cty_d, 10);
+	status |= qkp(kernel, (void *)&blk->cty_f, 11);
+	status |= qkp(kernel, (void *)&blk->cty_g, 12);
+	status |= qkp(kernel, (void *)&blk->cty_h, 13);
+	status |= qkp(kernel, (void *)&blk->nonce, 14);
+	status |= qkp(kernel, (void *)&blk->fW0, 15);
+	status |= qkp(kernel, (void *)&blk->fW1, 16);
+	status |= qkp(kernel, (void *)&blk->fW2, 17);
+	status |= qkp(kernel, (void *)&blk->fW3, 18);
+	status |= qkp(kernel, (void *)&blk->fW15, 19);
+	status |= qkp(kernel, (void *)&blk->fW01r, 20);
+	status |= qkp(kernel, (void *)&blk->fcty_e, 21);
+	status |= qkp(kernel, (void *)&blk->fcty_e2, 22);
+	status |= qkp(kernel, (void *)output, 23);
+
+	return status;
+}
+
 static void *gpuminer_thread(void *userdata)
 {
 	struct thr_info *mythr = userdata;
 	struct timeval tv_start;
 	int thr_id = mythr->id;
 	uint32_t res[128], blank_res[128];
+	cl_kernel *kernel;
 
 	setpriority(PRIO_PROCESS, 0, 19);
 
@@ -755,14 +795,7 @@ static void *gpuminer_thread(void *userdata)
 	cl_int status;
 
 	_clState *clState = clStates[thr_id];
-
-	status = clSetKernelArg(clState->kernel, 0,  sizeof(cl_mem), (void *)&clState->inputBuffer);
-	if (unlikely(status != CL_SUCCESS))
-		{ applog(LOG_ERR, "Error: Setting kernel argument 1.\n"); goto out; }
-
-	status = clSetKernelArg(clState->kernel, 1,  sizeof(cl_mem), (void *)&clState->outputBuffer);
-	if (unlikely(status != CL_SUCCESS))
-		{ applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; }
+	kernel = &clState->kernel;
 
 	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
 			BUFFERSIZE, blank_res, 0, NULL, NULL);
@@ -791,10 +824,9 @@ static void *gpuminer_thread(void *userdata)
 
 			precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
 			work->blk.nonce = 0;
-			status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
-				sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
+			status = queue_kernel_parameters(&work->blk, kernel, clState->outputBuffer);
 			if (unlikely(status != CL_SUCCESS))
-				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+				{ applog(LOG_ERR, "Error: clSetKernelArg failed."); exit (1); }
 
 			work_restart[thr_id].restart = 0;
 			need_work = false;
@@ -805,7 +837,7 @@ static void *gpuminer_thread(void *userdata)
 		}
 		clFinish(clState->commandQueue);
 
-		status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
+		status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
 				globalThreads, localThreads, 0,  NULL, NULL);
 		if (unlikely(status != CL_SUCCESS))
 			{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
@@ -844,12 +876,9 @@ static void *gpuminer_thread(void *userdata)
 				need_work = true;
 
 		clFinish(clState->commandQueue);
-
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
-				sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
+		status = qkp(kernel, (void *)&work->blk.nonce, 14);
 		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
-
+			{ applog(LOG_ERR, "Error: clSetKernelArg failed."); goto out; }
 	}
 out:
 	tq_freeze(mythr->q);
diff --git a/ocl.c b/ocl.c
index 326f303..7cfb400 100644
--- a/ocl.c
+++ b/ocl.c
@@ -374,7 +374,7 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
 	}
 
 	/* get a kernel object handle for a kernel with the given name */
-	clState->kernel = clCreateKernel(clState->program, "oclminer", &status);
+	clState->kernel = clCreateKernel(clState->program, "search", &status);
 	if(status != CL_SUCCESS)
 	{
 		printf("Error: Creating Kernel from program. (clCreateKernel)\n");
@@ -391,12 +391,6 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
 		return NULL;
 	}
 
-	clState->inputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, sizeof(dev_blk_ctx), NULL, &status);
-	if(status != CL_SUCCESS) {
-		printf("Error: clCreateBuffer (inputBuffer)\n");
-		return NULL;
-	}
-
 	clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * 128, NULL, &status);
 	if(status != CL_SUCCESS) {
 		printf("Error: clCreateBuffer (outputBuffer)\n");
diff --git a/ocl.h b/ocl.h
index 68c1670..eacb33c 100644
--- a/ocl.h
+++ b/ocl.h
@@ -11,7 +11,6 @@ typedef struct {
 	cl_kernel kernel;
 	cl_command_queue commandQueue;
 	cl_program program;
-	cl_mem inputBuffer;
 	cl_mem outputBuffer;
 } _clState;
 
diff --git a/oclminer.cl b/oclminer.cl
index 54fd1a0..209e4b1 100644
--- a/oclminer.cl
+++ b/oclminer.cl
@@ -57,38 +57,21 @@ uint fcty_e;
 uint fcty_e2;
 } dev_blk_ctx;
 
-__kernel __attribute__((vec_type_hint(uint))) WGS void oclminer(
-	__constant dev_blk_ctx *ctx, __global uint *output)
+__kernel __attribute__((vec_type_hint(uint))) WGS void search(
+const uint state0, const uint state1, const uint state2, const uint state3,
+const uint state4, const uint state5, const uint state6, const uint state7,
+const uint B1, const uint C1, const uint D1,
+const uint F1, const uint G1, const uint H1,
+const uint base,
+const uint fW0, const uint fW1, const uint fW2, const uint fW3, const uint fW15, const uint fW01r, const uint fcty_e, const uint fcty_e2,
+__global uint *output)
 {
-  const uint fW0 = ctx->fW0;
-  const uint fW1 = ctx->fW1;
-  const uint fW2 = ctx->fW2;
-  const uint fW3 = ctx->fW3;
-  const uint fW15 = ctx->fW15;
-  const uint fW01r = ctx->fW01r;
-  const uint fcty_e = ctx->fcty_e;
-  const uint fcty_e2 = ctx->fcty_e2;
-  const uint state0 = ctx->ctx_a;
-  const uint state1 = ctx->ctx_b;
-  const uint state2 = ctx->ctx_c;
-  const uint state3 = ctx->ctx_d;
-  const uint state4 = ctx->ctx_e;
-  const uint state5 = ctx->ctx_f;
-  const uint state6 = ctx->ctx_g;
-  const uint state7 = ctx->ctx_h;
-  const uint B1 = ctx->cty_b;
-  const uint C1 = ctx->cty_c;
-  const uint D1 = ctx->cty_d;
-  const uint F1 = ctx->cty_f;
-  const uint G1 = ctx->cty_g;
-  const uint H1 = ctx->cty_h;
-
   uint A, B, C, D, E, F, G, H;
   uint W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15;
   uint it;
   const uint myid = get_global_id(0);
 
-  const uint tnonce = ctx->nonce + myid;
+  const uint tnonce = base + myid;
 
     W3 = 0 ^ tnonce;
     E = fcty_e +  W3;