Commit ea10b08dcecdd0852dee6903331adbcd0a038ac7

Con Kolivas 2012-07-25T22:02:14

Find the nearest power of 2 maximum alloc size for the scrypt buffer that can successfully be allocated and is large enough to accomodate the thread concurrency chosen, thus mapping it to an intensity.

diff --git a/ocl.c b/ocl.c
index f726444..ba8cde2 100644
--- a/ocl.c
+++ b/ocl.c
@@ -472,17 +472,35 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
+		cl_ulong ma = gpus[gpu].max_alloc, mt;
+		int pow2 = 0;
+
 		if (!gpus[gpu].lookup_gap) {
 			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
 			gpus[gpu].lookup_gap = 2;
 		}
 		if (!gpus[gpu].thread_concurrency) {
-			gpus[gpu].thread_concurrency = gpus[gpu].max_alloc / 32768 / gpus[gpu].lookup_gap;
+			gpus[gpu].thread_concurrency = ma / 32768 / gpus[gpu].lookup_gap;
 			if (gpus[gpu].shaders && gpus[gpu].thread_concurrency > gpus[gpu].shaders)
 				gpus[gpu].thread_concurrency -= gpus[gpu].thread_concurrency % gpus[gpu].shaders;
 				
 			applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu,  gpus[gpu].thread_concurrency);
 		}
+
+		/* If we have memory to spare, try to find a power of 2 value
+		 * >= required amount to map nicely to an intensity */
+		mt = gpus[gpu].thread_concurrency * 32768 * gpus[gpu].lookup_gap;
+		if (ma > mt) {
+			while (ma >>= 1)
+				pow2++;
+			ma = 1;
+			while (--pow2 && ma < mt)
+				ma <<= 1;
+			if (ma >= mt) {
+				gpus[gpu].max_alloc = ma;
+				applog(LOG_DEBUG, "Max alloc decreased to %lu", gpus[gpu].max_alloc);
+			}
+		}
 	}
 #endif
 
@@ -776,8 +794,8 @@ built:
 		size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0));
 		size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency;
 
-		/* Always allocate the largest possible buffer allowed, even if we're not initially requiring it
-		 * based on thread_concurrency, giving us some headroom for intensity levels. */
+		/* Use the max alloc value which has been rounded to a power of
+		 * 2 greater >= required amount earlier */
 		if (bufsize > gpus[gpu].max_alloc) {
 			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
 			       gpu, gpus[gpu].max_alloc, bufsize);