Use diablo kernel on all future SDKs for Tahiti and set preferred vector width to 1 on poclbm kernel only.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
diff --git a/ocl.c b/ocl.c
index 1a355ef..e5eb6ec 100644
--- a/ocl.c
+++ b/ocl.c
@@ -332,32 +332,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
}
applog(LOG_DEBUG, "Max work group size reported %d", clState->max_work_size);
- /* For some reason 2 vectors is still better even if the card says
- * otherwise, and many cards lie about their max so use 256 as max
- * unless explicitly set on the command line. 79x0 cards perform
- * better without vectors */
- if (preferred_vwidth > 1) {
- if (strstr(name, "Tahiti"))
- preferred_vwidth = 1;
- else
- preferred_vwidth = 2;
- }
-
- if (gpus[gpu].vwidth)
- clState->vwidth = gpus[gpu].vwidth;
- else {
- clState->vwidth = preferred_vwidth;
- gpus[gpu].vwidth = preferred_vwidth;
- }
-
- if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
- clState->wsize = gpus[gpu].work_size;
- else if (strstr(name, "Tahiti"))
- clState->wsize = 64;
- else
- clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
- gpus[gpu].work_size = clState->wsize;
-
/* Create binary filename based on parameters passed to opencl
* compiler to ensure we only load a binary that matches what would
* have otherwise created. The filename is:
@@ -378,9 +352,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
applog(LOG_INFO, "Selecting diablo kernel");
clState->chosen_kernel = KL_DIABLO;
}
- } else if (strstr(vbuff, "898.1")) { // Windows 64 bit 12.2 driver
- applog(LOG_INFO, "Selecting diablo kernel");
- clState->chosen_kernel = KL_DIABLO;
+ } else if (strstr(vbuff, "898.1") || // Windows 64 bit 12.2 driver
+ strstr(name, "Tahiti")) { // All non SDK 2.6 79x0
+ applog(LOG_INFO, "Selecting diablo kernel");
+ clState->chosen_kernel = KL_DIABLO;
} else if (clState->hasBitAlign) {
applog(LOG_INFO, "Selecting phatk kernel");
clState->chosen_kernel = KL_PHATK;
@@ -393,10 +368,18 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
} else
clState->chosen_kernel = gpus[gpu].kernel;
+ /* For some reason 2 vectors is still better even if the card says
+ * otherwise, and many cards lie about their max so use 256 as max
+ * unless explicitly set on the command line. */
+ if (preferred_vwidth > 2)
+ preferred_vwidth = 2;
+
switch (clState->chosen_kernel) {
case KL_POCLBM:
strcpy(filename, POCLBM_KERNNAME".cl");
strcpy(binaryfilename, POCLBM_KERNNAME);
+ /* This kernel prefers to not use vectors */
+ preferred_vwidth = 1;
break;
case KL_PHATK:
strcpy(filename, PHATK_KERNNAME".cl");
@@ -413,6 +396,21 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
break;
}
+ if (gpus[gpu].vwidth)
+ clState->vwidth = gpus[gpu].vwidth;
+ else {
+ clState->vwidth = preferred_vwidth;
+ gpus[gpu].vwidth = preferred_vwidth;
+ }
+
+ if (gpus[gpu].work_size && gpus[gpu].work_size <= clState->max_work_size)
+ clState->wsize = gpus[gpu].work_size;
+ else if (strstr(name, "Tahiti"))
+ clState->wsize = 64;
+ else
+ clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
+ gpus[gpu].work_size = clState->wsize;
+
FILE *binaryfile;
size_t *binary_sizes;
char **binaries;