Commit 39f7d2fa74567773549df6a04358b05b994176cc

Con Kolivas 2012-07-21T17:31:06

Allow lookup gap and thread concurrency to be passed per device and store details in kernel binary filename.

diff --git a/cgminer.c b/cgminer.c
index f2ea382..b37814d 100644
--- a/cgminer.c
+++ b/cgminer.c
@@ -854,6 +854,11 @@ static struct opt_table opt_config_table[] = {
 		     set_gpu_vddc, NULL, NULL,
 		     "Set the GPU voltage in Volts - one value for all or separate by commas for per card"),
 #endif
+#ifdef USE_SCRYPT
+	OPT_WITH_ARG("--lookup-gap",
+		     set_lookup_gap, NULL, NULL,
+		     "Set GPU lookup gap for scrypt mining, comma separated"),
+#endif
 	OPT_WITH_ARG("--intensity|-I",
 		     set_intensity, NULL, NULL,
 		     "Intensity of GPU scanning (d or " _MIN_INTENSITY_STR " -> " _MAX_INTENSITY_STR ", default: d to maintain desktop interactivity)"),
@@ -999,6 +1004,11 @@ static struct opt_table opt_config_table[] = {
 			opt_hidden
 #endif
 	),
+#ifdef USE_SCRYPT
+	OPT_WITH_ARG("--thread-concurrency",
+		     set_thread_concurrency, NULL, NULL,
+		     "Set GPU thread concurrency for scrypt mining, comma separated"),
+#endif
 	OPT_WITH_ARG("--url|-o",
 		     set_url, NULL, NULL,
 		     "URL for bitcoin JSON-RPC server"),
diff --git a/driver-opencl.c b/driver-opencl.c
index e44faef..059a7ec 100644
--- a/driver-opencl.c
+++ b/driver-opencl.c
@@ -127,6 +127,58 @@ char *set_worksize(char *arg)
 	return NULL;
 }
 
+#ifdef USE_SCRYPT
+char *set_lookup_gap(char *arg)
+{
+	int i, val = 0, device = 0;
+	char *nextptr;
+
+	nextptr = strtok(arg, ",");
+	if (nextptr == NULL)
+		return "Invalid parameters for set lookup gap";
+	val = atoi(nextptr);
+
+	gpus[device++].lookup_gap = val;
+
+	while ((nextptr = strtok(NULL, ",")) != NULL) {
+		val = atoi(nextptr);
+
+		gpus[device++].lookup_gap = val;
+	}
+	if (device == 1) {
+		for (i = device; i < MAX_GPUDEVICES; i++)
+			gpus[i].lookup_gap = gpus[0].lookup_gap;
+	}
+
+	return NULL;
+}
+
+char *set_thread_concurrency(char *arg)
+{
+	int i, val = 0, device = 0;
+	char *nextptr;
+
+	nextptr = strtok(arg, ",");
+	if (nextptr == NULL)
+		return "Invalid parameters for set thread concurrency";
+	val = atoi(nextptr);
+
+	gpus[device++].thread_concurrency = val;
+
+	while ((nextptr = strtok(NULL, ",")) != NULL) {
+		val = atoi(nextptr);
+
+		gpus[device++].thread_concurrency = val;
+	}
+	if (device == 1) {
+		for (i = device; i < MAX_GPUDEVICES; i++)
+			gpus[i].thread_concurrency = gpus[0].thread_concurrency;
+	}
+
+	return NULL;
+}
+#endif
+
 static enum cl_kernels select_kernel(char *arg)
 {
 	if (!strcmp(arg, "diablo"))
diff --git a/driver-opencl.h b/driver-opencl.h
index 600bd85..f09571b 100644
--- a/driver-opencl.h
+++ b/driver-opencl.h
@@ -18,6 +18,10 @@ extern char *set_temp_target(char *arg);
 extern char *set_intensity(char *arg);
 extern char *set_vector(char *arg);
 extern char *set_worksize(char *arg);
+#ifdef USE_SCRYPT
+extern char *set_lookup_gap(char *arg);
+extern char *set_thread_concurrency(char *arg);
+#endif
 extern char *set_kernel(char *arg);
 void manage_gpu(void);
 extern void pause_dynamic_threads(int gpu);
diff --git a/miner.h b/miner.h
index 5cc683c..65c8fa1 100644
--- a/miner.h
+++ b/miner.h
@@ -360,6 +360,10 @@ struct cgpu_info {
 	size_t work_size;
 	enum cl_kernels kernel;
 
+#ifdef USE_SCRYPT
+	int lookup_gap;
+	int thread_concurrency;
+#endif
 	struct timeval tv_gpustart;;
 	struct timeval tv_gpuend;
 	double gpu_us_average;
diff --git a/ocl.c b/ocl.c
index 8f70a39..4f21b2b 100644
--- a/ocl.c
+++ b/ocl.c
@@ -367,6 +367,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	 * compiler to ensure we only load a binary that matches what would
 	 * have otherwise created. The filename is:
 	 * name + kernelname +/- g(offset) + v + vectors + w + work_size + l + sizeof(long) + .bin
+	 * For scrypt the filename is:
+	 * name + kernelname + g + lg + lookup_gap + tc + thread_concurrency + w + work_size + l + sizeof(long) + .bin
 	 */
 	char binaryfilename[255];
 	char filename[255];
@@ -461,6 +463,15 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
 	gpus[gpu].work_size = clState->wsize;
 
+#ifdef USE_SCRYPT
+	if (opt_scrypt) {
+		if (!gpus[gpu].lookup_gap)
+			gpus[gpu].lookup_gap = 2;
+		if (!gpus[gpu].thread_concurrency)
+			gpus[gpu].thread_concurrency = 2048;
+	}
+#endif
+
 	FILE *binaryfile;
 	size_t *binary_sizes;
 	char **binaries;
@@ -485,24 +496,19 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		return NULL;
 	}
 
-#ifdef USE_SCRYPT
-	if (opt_scrypt) {
-		clState->lookup_gap = 1;
-		clState->thread_concurrency = 6144;
-	}
-#endif
-
 	strcat(binaryfilename, name);
 	if (clState->goffset)
 		strcat(binaryfilename, "g");
-	strcat(binaryfilename, "v");
-	sprintf(numbuf, "%d", clState->vwidth);
-	strcat(binaryfilename, numbuf);
-	strcat(binaryfilename, "w");
-	sprintf(numbuf, "%d", (int)clState->wsize);
+	if (opt_scrypt) {
+		sprintf(numbuf, "lg%dtc%d", gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency);
+		strcat(binaryfilename, numbuf);
+	} else {
+		sprintf(numbuf, "v%d", clState->vwidth);
+		strcat(binaryfilename, numbuf);
+	}
+	sprintf(numbuf, "w%d", (int)clState->wsize);
 	strcat(binaryfilename, numbuf);
-	strcat(binaryfilename, "l");
-	sprintf(numbuf, "%d", (int)sizeof(long));
+	sprintf(numbuf, "l%d", (int)sizeof(long));
 	strcat(binaryfilename, numbuf);
 	strcat(binaryfilename, ".bin");
 
@@ -566,7 +572,7 @@ build:
 #ifdef USE_SCRYPT
 	if (opt_scrypt)
 		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
-			(int)clState->lookup_gap, (int)clState->thread_concurrency, (int)clState->wsize);
+			gpus[gpu].lookup_gap, gpus[gpu].thread_concurrency, (int)clState->wsize);
 	else
 #endif
 	{
@@ -753,8 +759,8 @@ built:
 
 #ifdef USE_SCRYPT
 	if (opt_scrypt) {
-		size_t ipt = (1024 / clState->lookup_gap + (1024 % clState->lookup_gap > 0));
-		size_t bufsize = 128 * ipt * clState->thread_concurrency;
+		size_t ipt = (1024 / gpus[gpu].lookup_gap + (1024 % gpus[gpu].lookup_gap > 0));
+		size_t bufsize = 128 * ipt * gpus[gpu].thread_concurrency;
 
 		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 80, NULL, &status);
 		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
diff --git a/ocl.h b/ocl.h
index 56fa9b0..984e7d6 100644
--- a/ocl.h
+++ b/ocl.h
@@ -22,8 +22,6 @@ typedef struct {
 #ifdef USE_SCRYPT
 	cl_mem CLbuffer0;
 	cl_mem padbuffer8;
-	size_t lookup_gap;
-	size_t thread_concurrency;
 	size_t padbufsize;
 	void * cldata;
 #endif