Commit d8f81c18eed045cd084f97464cdc8fa0be5833e2

Con Kolivas 2012-07-23T17:51:57

Use the detected maximum allocable memory on a GPU to determine the optimal scrypt settings when lookup_gap and thread_concurrency parameters are not given.

diff --git a/ocl.c b/ocl.c
index 880aaf7..bd8fd6d 100644
--- a/ocl.c
+++ b/ocl.c
@@ -472,10 +472,14 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		if (!gpus[gpu].lookup_gap)
+		if (!gpus[gpu].lookup_gap) {
+			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
 			gpus[gpu].lookup_gap = 2;
-		if (!gpus[gpu].thread_concurrency)
-			gpus[gpu].thread_concurrency = 2048;
+		}
+		if (!gpus[gpu].thread_concurrency) {
+			gpus[gpu].thread_concurrency = gpus[gpu].max_alloc / 32768 / gpus[gpu].lookup_gap;
+			applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu,  gpus[gpu].thread_concurrency);
+		}
 	}
 #endif
 
@@ -769,9 +773,11 @@ built:
 		size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0));
 		size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency;
 
-		if (bufsize % 256)
-			bufsize += (256 - bufsize % 256);
 		applog(LOG_DEBUG, "Creating scrypt buffer sized %d", bufsize);
+		if (bufsize > gpus[gpu].max_alloc) {
+			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %u, your scrypt settings come to %u",
+			       gpu, gpus[gpu].max_alloc, bufsize);
+		}
 		clState->padbufsize = bufsize;
 		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
 		if (status != CL_SUCCESS) {