Don't use asynchronous work with flushes as it decreases reliability and two threads per GPU achieves the same throughput.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
diff --git a/main.c b/main.c
index 84179ff..abde75a 100644
--- a/main.c
+++ b/main.c
@@ -3030,34 +3030,12 @@ static void *gpuminer_thread(void *userdata)
struct timeval tv_gpustart, tv_gpuend;
suseconds_t gpu_us;
- gettimeofday(&tv_gpustart, NULL);
- timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
- /* This finish flushes the readbuffer set with CL_FALSE later */
- clFinish(clState->commandQueue);
- gettimeofday(&tv_gpuend, NULL);
- timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
- gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
- decay_time(&gpu_ms_average, gpu_us / 1000);
- if (opt_dynamic) {
- /* Try to not let the GPU be out for longer than 6ms, but
- * increase intensity when the system is idle, unless
- * dynamic is disabled. */
- if (gpu_ms_average > 7) {
- if (scan_intensity > -10)
- scan_intensity--;
- } else if (gpu_ms_average < 3) {
- if (scan_intensity < 10)
- scan_intensity++;
- }
- }
- set_threads_hashes(vectors, &threads, &hashes, globalThreads, localThreads[0]);
-
if (diff.tv_sec > opt_scantime ||
work->blk.nonce >= MAXTHREADS - hashes ||
work_restart[thr_id].restart ||
stale_work(work)) {
/* Ignore any reads since we're getting new work and queue a clean buffer */
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
@@ -3076,9 +3054,6 @@ static void *gpuminer_thread(void *userdata)
precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64));
work_restart[thr_id].restart = 0;
-
- /* Flushes the writebuffer set with CL_FALSE above */
- clFinish(clState->commandQueue);
}
status = queue_kernel_parameters(clState, &work->blk);
if (unlikely(status != CL_SUCCESS))
@@ -3087,7 +3062,7 @@ static void *gpuminer_thread(void *userdata)
/* MAXBUFFERS entry is used as a flag to say nonces exist */
if (res[MAXBUFFERS]) {
/* Clear the buffer again */
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
@@ -3095,7 +3070,6 @@ static void *gpuminer_thread(void *userdata)
applog(LOG_DEBUG, "GPU %d found something?", gpu);
postcalc_hash_async(mythr, work, res);
memset(res, 0, BUFFERSIZE);
- clFinish(clState->commandQueue);
}
status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
@@ -3103,11 +3077,32 @@ static void *gpuminer_thread(void *userdata)
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
- status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
+ gettimeofday(&tv_gpustart, NULL);
+ timeval_subtract(&diff, &tv_gpustart, &tv_gpuend);
+
+ status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
+ gettimeofday(&tv_gpuend, NULL);
+ timeval_subtract(&diff, &tv_gpuend, &tv_gpustart);
+ gpu_us = diff.tv_sec * 1000000 + diff.tv_usec;
+ decay_time(&gpu_ms_average, gpu_us / 1000);
+ if (opt_dynamic) {
+ /* Try to not let the GPU be out for longer than 6ms, but
+ * increase intensity when the system is idle, unless
+ * dynamic is disabled. */
+ if (gpu_ms_average > 7) {
+ if (scan_intensity > -10)
+ scan_intensity--;
+ } else if (gpu_ms_average < 3) {
+ if (scan_intensity < 10)
+ scan_intensity++;
+ }
+ }
+ set_threads_hashes(vectors, &threads, &hashes, globalThreads, localThreads[0]);
+
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
hashes_done += hashes;