Revert "Don't use asynchronous work with flushes as it decreases reliability and two threads per GPU achieves the same throughput." This reverts commit 7223508f7e5e77ca9b5ab7ffbc96817b3bbf2461. Bad idea. Need to work around sync lineup.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
diff --git a/main.c b/main.c
index abde75a..84179ff 100644
--- a/main.c
+++ b/main.c
@@ -3030,12 +3030,34 @@ static void *gpuminer_thread(void *userdata)
struct timeval tv_gpustart, tv_gpuend;
suseconds_t gpu_us;
+ gettimeofday(&tv_gpustart, NULL);
+ timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
+ /* This finish flushes the readbuffer set with CL_FALSE later */
+ clFinish(clState->commandQueue);
+ gettimeofday(&tv_gpuend, NULL);
+ timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
+ gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
+ decay_time(&gpu_ms_average, gpu_us / 1000);
+ if (opt_dynamic) {
+ /* Try to not let the GPU be out for longer than 6ms, but
+ * increase intensity when the system is idle, unless
+ * dynamic is disabled. */
+ if (gpu_ms_average > 7) {
+ if (scan_intensity > -10)
+ scan_intensity--;
+ } else if (gpu_ms_average < 3) {
+ if (scan_intensity < 10)
+ scan_intensity++;
+ }
+ }
+ set_threads_hashes(vectors, &threads, &hashes, globalThreads, localThreads[0]);
+
if (diff.tv_sec > opt_scantime ||
work->blk.nonce >= MAXTHREADS - hashes ||
work_restart[thr_id].restart ||
stale_work(work)) {
/* Ignore any reads since we're getting new work and queue a clean buffer */
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
@@ -3054,6 +3076,9 @@ static void *gpuminer_thread(void *userdata)
precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
work_restart[thr_id].restart = 0;
+
+ /* Flushes the writebuffer set with CL_FALSE above */
+ clFinish(clState->commandQueue);
}
status = queue_kernel_parameters(clState, &work->blk);
if (unlikely(status != CL_SUCCESS))
@@ -3062,7 +3087,7 @@ static void *gpuminer_thread(void *userdata)
/* MAXBUFFERS entry is used as a flag to say nonces exist */
if (res[MAXBUFFERS]) {
/* Clear the buffer again */
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
@@ -3070,6 +3095,7 @@ static void *gpuminer_thread(void *userdata)
applog(LOG_DEBUG, "GPU %d found something?", gpu);
postcalc_hash_async(mythr, work, res);
memset(res, 0, BUFFERSIZE);
+ clFinish(clState->commandQueue);
}
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
@@ -3077,32 +3103,11 @@ static void *gpuminer_thread(void *userdata)
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
- gettimeofday(&tv_gpustart, NULL);
- timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
-
- status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
+ status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
- gettimeofday(&tv_gpuend, NULL);
- timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
- gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
- decay_time(&gpu_ms_average, gpu_us / 1000);
- if (opt_dynamic) {
- /* Try to not let the GPU be out for longer than 6ms, but
- * increase intensity when the system is idle, unless
- * dynamic is disabled. */
- if (gpu_ms_average > 7) {
- if (scan_intensity > -10)
- scan_intensity--;
- } else if (gpu_ms_average < 3) {
- if (scan_intensity < 10)
- scan_intensity++;
- }
- }
- set_threads_hashes(vectors, &threads, &hashes, globalThreads, localThreads[0]);
-
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
hashes_done += hashes;