Find the nearest power of 2 maximum alloc size for the scrypt buffer that can successfully be allocated and is large enough to accomodate the thread concurrency chosen, thus mapping it to an intensity.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
diff --git a/ocl.c b/ocl.c
index f726444..ba8cde2 100644
--- a/ocl.c
+++ b/ocl.c
@@ -472,17 +472,35 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
#ifdef USE_SCRYPT
if (opt_scrypt) {
+ cl_ulong ma = gpus[gpu].max_alloc, mt;
+ int pow2 = 0;
+
if (!gpus[gpu].lookup_gap) {
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
gpus[gpu].lookup_gap = 2;
}
if (!gpus[gpu].thread_concurrency) {
- gpus[gpu].thread_concurrency = gpus[gpu].max_alloc / 32768 / gpus[gpu].lookup_gap;
+ gpus[gpu].thread_concurrency = ma / 32768 / gpus[gpu].lookup_gap;
if (gpus[gpu].shaders && gpus[gpu].thread_concurrency > gpus[gpu].shaders)
gpus[gpu].thread_concurrency -= gpus[gpu].thread_concurrency % gpus[gpu].shaders;
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu, gpus[gpu].thread_concurrency);
}
+
+ /* If we have memory to spare, try to find a power of 2 value
+ * >= required amount to map nicely to an intensity */
+ mt = gpus[gpu].thread_concurrency * 32768 * gpus[gpu].lookup_gap;
+ if (ma > mt) {
+ while (ma >>= 1)
+ pow2++;
+ ma = 1;
+ while (--pow2 && ma < mt)
+ ma <<= 1;
+ if (ma >= mt) {
+ gpus[gpu].max_alloc = ma;
+ applog(LOG_DEBUG, "Max alloc decreased to %lu", gpus[gpu].max_alloc);
+ }
+ }
}
#endif
@@ -776,8 +794,8 @@ built:
size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0));
size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency;
- /* Always allocate the largest possible buffer allowed, even if we're not initially requiring it
- * based on thread_concurrency, giving us some headroom for intensity levels. */
+ /* Use the max alloc value which has been rounded to a power of
+ * 2 greater >= required amount earlier */
if (bufsize > gpus[gpu].max_alloc) {
applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
gpu, gpus[gpu].max_alloc, bufsize);