Dramatically simplify the dynamic intensity calculation by oversampling many runs through the opencl kernel till we're likely well within the timer resolution on windows.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
diff --git a/driver-opencl.c b/driver-opencl.c
index 7912a38..8bd876b 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -1463,10 +1463,8 @@ static void opencl_free_work(struct thr_info *thr, struct work *work)
const int thr_id = thr->id;
struct opencl_thread_data *thrdata = thr->cgpu_data;
_clState *clState = clStates[thr_id];
- struct cgpu_info *gpu = thr->cgpu;
- if (!gpu->dynamic)
- clFinish(clState->commandQueue);
+ clFinish(clState->commandQueue);
if (thrdata->res[FOUND]) {
thrdata->last_work = &thrdata->_last_work;
@@ -1496,7 +1494,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
_clState *clState = clStates[thr_id];
const cl_kernel *kernel = &clState->kernel;
const int dynamic_us = opt_dynamic_interval * 1000;
- struct timeval tv_gpuend;
cl_int status;
size_t globalThreads[1];
@@ -1504,8 +1501,25 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
int64_t hashes;
/* This finish flushes the readbuffer set with CL_FALSE later */
- if (!gpu->dynamic)
- clFinish(clState->commandQueue);
+ clFinish(clState->commandQueue);
+
+ /* Windows' timer resolution is only 15ms so oversample 5x */
+ if (gpu->dynamic && (++gpu->intervals * dynamic_us) > 75) {
+ struct timeval tv_gpuend;
+ double gpu_us;
+
+ gettimeofday(&tv_gpuend, NULL);
+ gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
+ if (gpu_us > dynamic_us) {
+ if (gpu->intensity > MIN_INTENSITY)
+ --gpu->intensity;
+ } else if (gpu_us < dynamic_us / 2) {
+ if (gpu->intensity < MAX_INTENSITY)
+ ++gpu->intensity;
+ }
+ memcpy(&(gpu->tv_gpustart), &tv_gpuend, sizeof(struct timeval));
+ gpu->intervals = 0;
+ }
set_threads_hashes(clState->vwidth, &hashes, globalThreads, localThreads[0], &gpu->intensity);
if (hashes > gpu->max_hashes)
@@ -1532,18 +1546,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
clFinish(clState->commandQueue);
}
- if (gpu->dynamic) {
- gettimeofday(&gpu->tv_gpumid, NULL);
- if (gpu->new_work) {
- gpu->new_work = false;
- gpu->intervals = gpu->hit = 0;
- }
- if (!gpu->intervals) {
- gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec;
- gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec;
- }
- }
-
status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]);
if (unlikely(status != CL_SUCCESS)) {
applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
@@ -1571,39 +1573,6 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
return -1;
}
- if (gpu->dynamic) {
- double gpu_us;
-
- clFinish(clState->commandQueue);
- /* Windows returns the same time for gettimeofday due to its
- * 15ms timer resolution, so we must average the result over
- * at least 5 values that are actually different to get an
- * accurate result */
- gpu->intervals++;
- gettimeofday(&tv_gpuend, NULL);
- gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpumid);
- if (gpu_us > 0 && ++gpu->hit > 4) {
- gpu_us = us_tdiff(&tv_gpuend, &gpu->tv_gpustart) / gpu->intervals;
- /* Very rarely we may get an overflow so put an upper
- * limit on the detected time */
- if (unlikely(gpu->gpu_us_average > 0 && gpu_us > gpu->gpu_us_average * 4))
- gpu_us = gpu->gpu_us_average * 4;
- gpu->gpu_us_average = (gpu->gpu_us_average + gpu_us * 0.63) / 1.63;
-
- /* Try to not let the GPU be out for longer than
- * opt_dynamic_interval in ms, but increase
- * intensity when the system is idle in dynamic mode */
- if (gpu->gpu_us_average > dynamic_us) {
- if (gpu->intensity > MIN_INTENSITY)
- --gpu->intensity;
- } else if (gpu->gpu_us_average < dynamic_us / 2) {
- if (gpu->intensity < MAX_INTENSITY)
- ++gpu->intensity;
- }
- gpu->intervals = gpu->hit = 0;
- }
- }
-
/* The amount of work scanned can fluctuate when intensity changes
* and since we do this one cycle behind, we increment the work more
* than enough to prevent repeating work */
diff --git a/miner.h b/miner.h
index 5e98244..22618b2 100644
--- a/miner.h
+++ b/miner.h
@@ -401,9 +401,7 @@ struct cgpu_info {
size_t shaders;
#endif
struct timeval tv_gpustart;
- struct timeval tv_gpumid;
- double gpu_us_average;
- int intervals, hit;
+ int intervals;
#endif
bool new_work;