Commit 656b485d808d7ae22ca8df15dbdc7acbc818473c

Con Kolivas 2011-06-25T18:58:59

Make the worksize and vector width configurable.

diff --git a/cpu-miner.c b/cpu-miner.c
index 804c1c7..a1aa090 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -120,6 +120,8 @@ static bool opt_quiet = false;
 static int opt_retries = 10;
 static int opt_fail_pause = 30;
 static int opt_log_interval = 5;
+int opt_vectors;
+int opt_worksize;
 int opt_scantime = 60;
 static json_t *opt_config;
 static const bool opt_time = true;
@@ -231,6 +233,12 @@ static struct option_help options_help[] = {
 	  "(-u USERNAME) Username for bitcoin JSON-RPC server "
 	  "(default: " DEF_RPC_USERNAME ")" },
 
+	{ "vectors N",
+	  "(-v N) Override detected optimal vector width (default: detected, 1,2 or 4)" },
+
+	{ "worksize N",
+	  "(-w N) Override detected optimal worksize (default: detected)" },
+
 	{ "pass PASSWORD",
 	  "(-p PASSWORD) Password for bitcoin JSON-RPC server "
 	  "(default: " DEF_RPC_PASSWORD ")" },
@@ -257,6 +265,8 @@ static struct option options[] = {
 #endif
 	{ "url", 1, NULL, 1001 },
 	{ "user", 1, NULL, 'u' },
+	{ "vectors", 1, NULL, 'v' },
+	{ "worksize", 1, NULL, 'w' },
 	{ "userpass", 1, NULL, 1002 },
 };
 
@@ -902,7 +912,7 @@ static void *gpuminer_thread(void *userdata)
 
 	gettimeofday(&tv_start, NULL);
 	globalThreads[0] = threads;
-	localThreads[0] = clState->max_work_size / vectors;
+	localThreads[0] = clState->work_size;
 
 	while (1) {
 		struct timeval tv_end, diff, tv_workstart;
@@ -1164,6 +1174,20 @@ static void parse_arg (int key, char *arg)
 		free(rpc_user);
 		rpc_user = strdup(arg);
 		break;
+	case 'v':
+		v = atoi(arg);
+		if (v != 1 && v != 2 && v != 4)
+			show_usage();
+
+		opt_vectors = v;
+		break;
+	case 'w':
+		v = atoi(arg);
+		if (v < 1 || v > 9999)	/* sanity check */
+			show_usage();
+
+		opt_worksize = v;
+		break;
 	case 1001:			/* --url */
 		if (strncmp(arg, "http://", 7) &&
 		    strncmp(arg, "https://", 8))
diff --git a/ocl.c b/ocl.c
index 3163dd0..f3ba972 100644
--- a/ocl.c
+++ b/ocl.c
@@ -14,6 +14,9 @@
 #include "findnonce.h"
 #include "ocl.h"
 
+extern int opt_vectors;
+extern int opt_worksize;
+
 char *file_contents(const char *filename, int *length)
 {
 	FILE *f = fopen(filename, "r");
@@ -309,6 +312,13 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	char *source = file_contents(filename, &pl);
 	size_t sourceSize[] = {(size_t)pl};
 
+	if (opt_vectors)
+		clState->preferred_vwidth = opt_vectors;
+	if (opt_worksize && opt_worksize <= clState->max_work_size)
+		clState->work_size = opt_worksize;
+	else
+		clState->work_size = clState->max_work_size / clState->preferred_vwidth;
+
 	/* Patch the source file with the preferred_vwidth */
 	if (clState->preferred_vwidth > 1) {
 		char *find = strstr(source, "VECTORSX");
@@ -342,7 +352,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BFI_INT patch");
 
 	applog(LOG_INFO, "Initialising kernel with%s BFI_INT patching, %d vectors and worksize %d",
-	       hasBitAlign ? "" : "out", clState->preferred_vwidth, clState->max_work_size / clState->preferred_vwidth);
+	       hasBitAlign ? "" : "out", clState->preferred_vwidth, clState->work_size);
 
 	clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
 	if(status != CL_SUCCESS) 
diff --git a/ocl.h b/ocl.h
index 311fee1..a50c0b6 100644
--- a/ocl.h
+++ b/ocl.h
@@ -14,6 +14,7 @@ typedef struct {
 	cl_mem outputBuffer;
 	cl_uint preferred_vwidth;
 	size_t max_work_size;
+	size_t work_size;
 } _clState;
 
 extern char *file_contents(const char *filename, int *length);