Commit 217be6ed98c907c3217c5eb587e15f53ff30e3d9

Con Kolivas 2012-11-25T00:37:46

Do away with the flaky free_work api in the driver code which would often lose the work data in opencl and simply flush it before exiting the opencl scanhash.

diff --git a/cgminer.c b/cgminer.c
index 5654154..9b71b36 100644
--- a/cgminer.c
+++ b/cgminer.c
@@ -5535,8 +5535,6 @@ void *miner_thread(void *userdata)
 
 	while (1) {
 		mythr->work_restart = false;
-		if (api->free_work && likely(work->pool))
-			api->free_work(mythr, work);
 		get_work(work, mythr, thr_id);
 		cgpu->new_work = true;
 
diff --git a/driver-opencl.c b/driver-opencl.c
index 9dc03a9..2dd0454 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1455,17 +1455,6 @@ static bool opencl_thread_init(struct thr_info *thr)
 	return true;
 }
 
-static void opencl_free_work(struct thr_info *thr, struct work *work)
-{
-	const int thr_id = thr->id;
-	struct opencl_thread_data *thrdata = thr->cgpu_data;
-	_clState *clState = clStates[thr_id];
-
-	clFinish(clState->commandQueue);
-
-	if (thrdata->res[FOUND])
-		thrdata->last_work = copy_work(work);
-}
 
 static bool opencl_prepare_work(struct thr_info __maybe_unused *thr, struct work *work)
 {
@@ -1495,9 +1484,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	size_t localThreads[1] = { clState->wsize };
 	int64_t hashes;
 
-	/* This finish flushes the readbuffer set with CL_FALSE later */
-	clFinish(clState->commandQueue);
-
 	/* Windows' timer resolution is only 15ms so oversample 5x */
 	if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 70000) {
 		struct timeval tv_gpuend;
@@ -1520,28 +1506,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	if (hashes > gpu->max_hashes)
 		gpu->max_hashes = hashes;
 
-	/* FOUND entry is used as a counter to say how many nonces exist */
-	if (thrdata->res[FOUND]) {
-		/* Clear the buffer again */
-		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
-				BUFFERSIZE, blank_res, 0, NULL, NULL);
-		if (unlikely(status != CL_SUCCESS)) {
-			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
-			return -1;
-		}
-		if (unlikely(thrdata->last_work)) {
-			applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
-			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
-			free_work(thrdata->last_work);
-			thrdata->last_work = NULL;
-		} else {
-			applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
-			postcalc_hash_async(thr, work, thrdata->res);
-		}
-		memset(thrdata->res, 0, BUFFERSIZE);
-		clFinish(clState->commandQueue);
-	}
-
 	status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
@@ -1574,6 +1538,32 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	 * than enough to prevent repeating work */
 	work->blk.nonce += gpu->max_hashes;
 
+	/* This finish flushes the readbuffer set with CL_FALSE in clEnqueueReadBuffer */
+	clFinish(clState->commandQueue);
+
+	/* FOUND entry is used as a counter to say how many nonces exist */
+	if (thrdata->res[FOUND]) {
+		/* Clear the buffer again */
+		status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+				BUFFERSIZE, blank_res, 0, NULL, NULL);
+		if (unlikely(status != CL_SUCCESS)) {
+			applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
+			return -1;
+		}
+		if (unlikely(thrdata->last_work)) {
+			applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id);
+			postcalc_hash_async(thr, thrdata->last_work, thrdata->res);
+			free_work(thrdata->last_work);
+			thrdata->last_work = NULL;
+		} else {
+			applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id);
+			postcalc_hash_async(thr, work, thrdata->res);
+		}
+		memset(thrdata->res, 0, BUFFERSIZE);
+		/* This finish flushes the writebuffer set with CL_FALSE in clEnqueueWriteBuffer */
+		clFinish(clState->commandQueue);
+	}
+
 	return hashes;
 }
 
@@ -1599,7 +1589,6 @@ struct device_api opencl_api = {
 	.get_statline = get_opencl_statline,
 	.thread_prepare = opencl_thread_prepare,
 	.thread_init = opencl_thread_init,
-	.free_work = opencl_free_work,
 	.prepare_work = opencl_prepare_work,
 	.scanhash = opencl_scanhash,
 	.thread_shutdown = opencl_thread_shutdown,
diff --git a/miner.h b/miner.h
index 0a97e4a..ba174a8 100644
--- a/miner.h
+++ b/miner.h
@@ -268,7 +268,6 @@ struct device_api {
 	bool (*thread_prepare)(struct thr_info*);
 	uint64_t (*can_limit_work)(struct thr_info*);
 	bool (*thread_init)(struct thr_info*);
-	void (*free_work)(struct thr_info*, struct work*);
 	bool (*prepare_work)(struct thr_info*, struct work*);
 	int64_t (*scanhash)(struct thr_info*, struct work*, int64_t);
 	void (*hw_error)(struct thr_info*);