Do the dynamic timing in opencl code over a single pass through scanhash to make sure we're only getting opencl times contributing to the measured intervals.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
diff --git a/driver-opencl.c b/driver-opencl.c
index 78908bf..5ca659c 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1460,6 +1460,10 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
const int thr_id = thr->id;
struct opencl_thread_data *thrdata = thr->cgpu_data;
_clState *clState = clStates[thr_id];
+ struct cgpu_info *gpu = thr->cgpu;
+
+ if (gpu->dynamic)
+ return;
clFinish(clState->commandQueue);
if (thrdata->res[FOUND]) {
@@ -1491,7 +1495,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
const cl_kernel *kernel = &clState->kernel;
const int dynamic_us = opt_dynamic_interval * 1000;
struct timeval tv_gpuend;
- cl_bool blocking;
cl_int status;
size_t globalThreads[1];
@@ -1499,57 +1502,19 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
unsigned int threads;
int64_t hashes;
- if (gpu->dynamic)
- blocking = CL_TRUE;
- else
- blocking = CL_FALSE;
-
/* This finish flushes the readbuffer set with CL_FALSE later */
- if (!blocking)
+ if (!gpu->dynamic)
clFinish(clState->commandQueue);
- if (gpu->dynamic) {
- double gpu_us;
-
- /* Windows returns the same time for gettimeofday due to its
- * 15ms timer resolution, so we must average the result over
- * at least 5 values that are actually different to get an
- * accurate result */
- gpu->intervals++;
- gettimeofday(&tv_gpuend, NULL);
- gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
- if (gpu_us > 0 && ++gpu->hit > 4) {
- gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
- gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
-
- /* Try to not let the GPU be out for longer than
- * opt_dynamic_interval in ms, but increase
- * intensity when the system is idle in dynamic mode */
- if (gpu->gpu_us_average > dynamic_us) {
- if (gpu->intensity > MIN_INTENSITY)
- --gpu->intensity;
- } else if (gpu->gpu_us_average < dynamic_us / 2) {
- if (gpu->intensity < MAX_INTENSITY)
- ++gpu->intensity;
- }
- gpu->intervals = gpu->hit = 0;
- }
- }
set_threads_hashes(clState->vwidth, &threads, &hashes, globalThreads,
localThreads[0], gpu->intensity);
if (hashes > gpu->max_hashes)
gpu->max_hashes = hashes;
- status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
- if (unlikely(status != CL_SUCCESS)) {
- applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
- return -1;
- }
-
/* MAXBUFFERS entry is used as a flag to say nonces exist */
if (thrdata->res[FOUND]) {
/* Clear the buffer again */
- status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
+ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, blank_res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed.");
@@ -1564,8 +1529,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
postcalc_hash_async(thr, work, thrdata->res);
}
memset(thrdata->res, 0, BUFFERSIZE);
- if (!blocking)
- clFinish(clState->commandQueue);
+ clFinish(clState->commandQueue);
}
gettimeofday(&gpu->tv_gpumid, NULL);
@@ -1574,6 +1538,12 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
}
+ status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
+ if (unlikely(status != CL_SUCCESS)) {
+ applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
+ return -1;
+ }
+
if (clState->goffset) {
size_t global_work_offset[1];
@@ -1588,13 +1558,42 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
return -1;
}
- status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, blocking, 0,
+ status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
BUFFERSIZE, thrdata->res, 0, NULL, NULL);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clEnqueueReadBuffer failed error %d. (clEnqueueReadBuffer)", status);
return -1;
}
+ if (gpu->dynamic) {
+ double gpu_us;
+
+ clFinish(clState->commandQueue);
+ /* Windows returns the same time for gettimeofday due to its
+ * 15ms timer resolution, so we must average the result over
+ * at least 5 values that are actually different to get an
+ * accurate result */
+ gpu->intervals++;
+ gettimeofday(&tv_gpuend, NULL);
+ gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
+ if (gpu_us > 0 && ++gpu->hit > 4) {
+ gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
+ gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
+
+ /* Try to not let the GPU be out for longer than
+ * opt_dynamic_interval in ms, but increase
+ * intensity when the system is idle in dynamic mode */
+ if (gpu->gpu_us_average > dynamic_us) {
+ if (gpu->intensity > MIN_INTENSITY)
+ --gpu->intensity;
+ } else if (gpu->gpu_us_average < dynamic_us / 2) {
+ if (gpu->intensity < MAX_INTENSITY)
+ ++gpu->intensity;
+ }
+ gpu->intervals = gpu->hit = 0;
+ }
+ }
+
/* The amount of work scanned can fluctuate when intensity changes
* and since we do this one cycle behind, we increment the work more
* than enough to prevent repeating work */