Commit 709c4cd8e1cd56e644e3842a3dfeec61f5b7d3b9

Con Kolivas 2012-02-23T20:24:32

Use diablo kernel on all future SDKs for Tahiti and set preferred vector width to 1 on poclbm kernel only.

diff --git a/ocl.c b/ocl.c
index 1a355ef..e5eb6ec 100644
--- a/ocl.c
+++ b/ocl.c
@@ -332,32 +332,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	}
 	applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
 
-	/* For some reason 2 vectors is still better even if the card says
-	 * otherwise, and many cards lie about their max so use 256 as max
-	 * unless explicitly set on the command line. 79x0 cards perform
-	 * better without vectors */
-	if (preferred_vwidth > 1) {
-		if (strstr(name, "Tahiti"))
-			preferred_vwidth = 1;
-		else
-			preferred_vwidth = 2;
-	}
-
-	if (gpus[gpu].vwidth)
-		clState->vwidth = gpus[gpu].vwidth;
-	else {
-		clState->vwidth = preferred_vwidth;
-		gpus[gpu].vwidth = preferred_vwidth;
-	}
-
-	if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
-		clState->wsize = gpus[gpu].work_size;
-	else if (strstr(name, "Tahiti"))
-		clState->wsize = 64;
-	else
-		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
-	gpus[gpu].work_size = clState->wsize;
-
 	/* Create binary filename based on parameters passed to opencl
 	 * compiler to ensure we only load a binary that matches what would
 	 * have otherwise created. The filename is:
@@ -378,9 +352,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 				applog(LOG_INFO, "Selecting diablo kernel");
 				clState->chosen_kernel = KL_DIABLO;
 			}
-		} else if (strstr(vbuff, "898.1")) { // Windows 64 bit 12.2 driver
-			applog(LOG_INFO, "Selecting diablo kernel");
-			clState->chosen_kernel = KL_DIABLO;
+		} else if (strstr(vbuff, "898.1") || // Windows 64 bit 12.2 driver
+			   strstr(name, "Tahiti")) { // All non SDK 2.6 79x0
+				applog(LOG_INFO, "Selecting diablo kernel");
+				clState->chosen_kernel = KL_DIABLO;
 		} else if (clState->hasBitAlign) {
 			applog(LOG_INFO, "Selecting phatk kernel");
 			clState->chosen_kernel = KL_PHATK;
@@ -393,10 +368,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 	} else
 		clState->chosen_kernel = gpus[gpu].kernel;
 
+	/* For some reason 2 vectors is still better even if the card says
+	 * otherwise, and many cards lie about their max so use 256 as max
+	 * unless explicitly set on the command line. */
+	if (preferred_vwidth > 2)
+		preferred_vwidth = 2;
+
 	switch (clState->chosen_kernel) {
 		case KL_POCLBM:
 			strcpy(filename, POCLBM_KERNNAME".cl");
 			strcpy(binaryfilename, POCLBM_KERNNAME);
+			/* This kernel prefers to not use vectors */
+			preferred_vwidth = 1;
 			break;
 		case KL_PHATK:
 			strcpy(filename, PHATK_KERNNAME".cl");
@@ -413,6 +396,21 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 			break;
 	}
 
+	if (gpus[gpu].vwidth)
+		clState->vwidth = gpus[gpu].vwidth;
+	else {
+		clState->vwidth = preferred_vwidth;
+		gpus[gpu].vwidth = preferred_vwidth;
+	}
+
+	if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
+		clState->wsize = gpus[gpu].work_size;
+	else if (strstr(name, "Tahiti"))
+		clState->wsize = 64;
+	else
+		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
+	gpus[gpu].work_size = clState->wsize;
+
 	FILE *binaryfile;
 	size_t *binary_sizes;
 	char **binaries;