The buffer needs to be flushed before enqueueing the kernel again. Further optimise the mining loop by removing the need_work bool.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
diff --git a/cpu-miner.c b/cpu-miner.c
index 1ea2fd0..8870272 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -871,7 +871,7 @@ static inline int gpu_from_thr_id(int thr_id)
static void *gpuminer_thread(void *userdata)
{
struct thr_info *mythr = userdata;
- struct timeval tv_start;
+ struct timeval tv_start, diff;
int thr_id = mythr->id;
uint32_t res[128], blank_res[128];
cl_kernel *kernel;
@@ -892,7 +892,6 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; }
struct work *work = malloc(sizeof(struct work));
- bool need_work = true;
unsigned const int threads = 1 << (15 + scan_intensity);
unsigned const int vectors = clState->preferred_vwidth;
unsigned const int hashes = threads * vectors;
@@ -901,14 +900,16 @@ static void *gpuminer_thread(void *userdata)
gettimeofday(&tv_start, NULL);
globalThreads[0] = threads;
localThreads[0] = clState->work_size;
+ work_restart[thr_id].restart = 1;
+ diff.tv_sec = 0;
while (1) {
- struct timeval tv_end, diff, tv_workstart;
+ struct timeval tv_end, tv_workstart;
unsigned int i;
+ /* This finish flushes the readbuffer set with CL_FALSE later */
clFinish(clState->commandQueue);
-
- if (need_work) {
+ if (diff.tv_sec > opt_scantime || work->blk.nonce > MAXTHREADS - hashes || work_restart[thr_id].restart) {
gettimeofday(&tv_workstart, NULL);
/* obtain new work from internal workio thread */
if (unlikely(!get_work(work))) {
@@ -924,22 +925,15 @@ static void *gpuminer_thread(void *userdata)
{ applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); goto out; }
work_restart[thr_id].restart = 0;
- need_work = false;
if (opt_debug)
applog(LOG_DEBUG, "getwork");
-
} else {
status = clSetKernelArg(*kernel, 14, sizeof(uint), (void *)&work->blk.nonce);
if (unlikely(status != CL_SUCCESS))
{ applog(LOG_ERR, "Error: clSetKernelArg of nonce failed."); goto out; }
}
- status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
- globalThreads, localThreads, 0, NULL, NULL);
- if (unlikely(status != CL_SUCCESS))
- { applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
-
/* 127 is used as a flag to say nonces exist */
if (unlikely(res[127])) {
/* Clear the buffer again */
@@ -957,6 +951,11 @@ static void *gpuminer_thread(void *userdata)
clFinish(clState->commandQueue);
}
+ status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL,
+ globalThreads, localThreads, 0, NULL, NULL);
+ if (unlikely(status != CL_SUCCESS))
+ { applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; }
+
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS))
@@ -973,11 +972,6 @@ static void *gpuminer_thread(void *userdata)
}
timeval_subtract(&diff, &tv_end, &tv_workstart);
-
- if (diff.tv_sec > opt_scantime ||
- work->blk.nonce > MAXTHREADS - hashes ||
- work_restart[thr_id].restart)
- need_work = true;
}
out:
tq_freeze(mythr->q);