Commit f117675ac208817aaf7a7432faa65a396ee9c4ac

Con Kolivas 2011-06-22T10:15:23

Optimise work loop to make cl calls asynchronous where possible.

diff --git a/cpu-miner.c b/cpu-miner.c
index cf611f9..e6d7d41 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -743,11 +743,11 @@ static void *gpuminer_thread(void *userdata)
 	struct thr_info *mythr = userdata;
 	struct timeval tv_start;
 	int thr_id = mythr->id;
-	uint32_t res[128];
+	uint32_t res[128], blank_res[128];
 
 	setpriority(PRIO_PROCESS, 0, 19);
 
-	memset(res, 0, BUFFERSIZE);
+	memset(blank_res, 0, BUFFERSIZE);
 
 	size_t globalThreads[1];
 	size_t localThreads[1];
@@ -765,26 +765,23 @@ static void *gpuminer_thread(void *userdata)
 		{ applog(LOG_ERR, "Error: Setting kernel argument 2.\n"); goto out; }
 
 	status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-			BUFFERSIZE, res, 0, NULL, NULL);   
+			BUFFERSIZE, blank_res, 0, NULL, NULL);
 	if (unlikely(status != CL_SUCCESS))
 		{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
 
 	struct work *work = malloc(sizeof(struct work));
 	bool need_work = true;
 	unsigned int threads = 1 << 22;
-	unsigned int h0count = 0;
+
 	gettimeofday(&tv_start, NULL);
+	globalThreads[0] = threads;
+	localThreads[0] = 128;
 
 	while (1) {
 		struct timeval tv_end, diff;
 		int i;
 
 		if (need_work) {
-			work_restart[thr_id].restart = 0;
-
-			if (opt_debug)
-				applog(LOG_DEBUG, "getwork");
-
 			/* obtain new work from internal workio thread */
 			if (unlikely(!get_work(mythr, work))) {
 				applog(LOG_ERR, "work retrieval failed, exiting "
@@ -793,47 +790,48 @@ static void *gpuminer_thread(void *userdata)
 			}
 
 			precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
-
 			work->blk.nonce = 0;
+			status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
+				sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
+			if (unlikely(status != CL_SUCCESS))
+				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+
+			work_restart[thr_id].restart = 0;
 			need_work = false;
-		}
-		globalThreads[0] = threads;
-		localThreads[0] = 128;
 
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0,
-				sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+			if (opt_debug)
+				applog(LOG_DEBUG, "getwork");
+
+		}
+		clFinish(clState->commandQueue);
 
-		status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL, 
+		status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
 				globalThreads, localThreads, 0,  NULL, NULL);
 		if (unlikely(status != CL_SUCCESS))
 			{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
 
-		status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, 
-				BUFFERSIZE, res, 0, NULL, NULL);   
-		if (unlikely(status != CL_SUCCESS))
-			{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
+		/* 127 is used as a flag to say nonces exist */
 		if (unlikely(res[127])) {
-			/* 127 is used as a flag to say nonces exist */
+			/* Clear the buffer again */
+			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+					BUFFERSIZE, blank_res, 0, NULL, NULL);
+			if (unlikely(status != CL_SUCCESS))
+				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
 			for (i = 0; i < 127; i++) {
 				if (res[i]) {
-					uint32_t start = res[i];
-					uint32_t my_g, my_nonce;
-
 					applog(LOG_INFO, "GPU Found something?");
-					my_g = postcalc_hash(mythr, &work->blk, work, start, start + 1026, &my_nonce, &h0count);
-					res[i] = 0;
+					postcalc_hash(mythr, &work->blk, work, res[i]);
 				} else
 					break;
 			}
-			/* Clear the buffer again */
-			status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
-					BUFFERSIZE, res, 0, NULL, NULL);   
-			if (unlikely(status != CL_SUCCESS))
-				{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+			clFinish(clState->commandQueue);
 		}
 
+		status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+				BUFFERSIZE, res, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS))
+			{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
+
 		gettimeofday(&tv_end, NULL);
 		timeval_subtract(&diff, &tv_end, &tv_start);
 		hashmeter(thr_id, &diff, threads);
@@ -844,6 +842,14 @@ static void *gpuminer_thread(void *userdata)
 		if (unlikely(work->blk.nonce > MAXTHREADS - threads) ||
 			(work_restart[thr_id].restart))
 				need_work = true;
+
+		clFinish(clState->commandQueue);
+
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_FALSE, 0,
+				sizeof(dev_blk_ctx), (void *)&work->blk, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS))
+			{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
+
 	}
 out:
 	tq_freeze(mythr->q);
diff --git a/findnonce.c b/findnonce.c
index 7956a0b..3c310fc 100644
--- a/findnonce.c
+++ b/findnonce.c
@@ -131,14 +131,13 @@ void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
   R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
   R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
 
-uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
-		       struct work *work, uint32_t start, uint32_t end, 
-		       uint32_t *best_nonce, unsigned int *h0count)
+void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start)
 {
 	cl_uint A, B, C, D, E, F, G, H;
 	cl_uint W[16];
 	cl_uint nonce;
 	cl_uint best_g = ~0;
+	uint32_t end = start + 1026;
 
 	for (nonce = start; nonce != end; nonce+=1) {
 		A = blk->cty_a; B = blk->cty_b;
@@ -171,8 +170,6 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
 		FR(48); PFR(56);
 
 		if (unlikely(H == 0xA41F32E7)) {
-			(*h0count)++;
-
 			if (unlikely(submit_nonce(thr, work, nonce) == false)) {
 				applog(LOG_ERR, "Failed to submit work, exiting");
 				goto out;
@@ -181,14 +178,11 @@ uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
 			G += 0x1f83d9ab;
 			G = ByteReverse(G);
 
-			if (G < best_g) {
-				*best_nonce = nonce;
+			if (G < best_g)
 				best_g = G;
-			}
 		}
 	}
 out:
-	// if (unlikely(best_g == ~0)) applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
-
-	return best_g;
+	if (unlikely(best_g == ~0))
+		applog(LOG_ERR, "No best_g found! Error in OpenCL code?");
 }
diff --git a/findnonce.h b/findnonce.h
index 704aaae..8c6b9af 100644
--- a/findnonce.h
+++ b/findnonce.h
@@ -19,6 +19,4 @@ typedef struct {
 } dev_blk_ctx;
 
 extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
-extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
-			      struct work *work, uint32_t start, uint32_t end,
-			      uint32_t *best_nonce, unsigned int *h0count);
+extern void postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, struct work *work, uint32_t start);