Instead of using the BFI_INT patching hack on any device reporting cl_amd_media_ops, create a whitelist of devices that need it. This should enable GCN architectures (ATI 79xx cards) to work properly.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
diff --git a/ocl.c b/ocl.c
index c0d0351..31b4951 100644
--- a/ocl.c
+++ b/ocl.c
@@ -185,7 +185,7 @@ void patch_opcodes(char *w, unsigned remaining)
_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
{
- int patchbfi = 0;
+ bool patchbfi = false;
cl_int status = 0;
unsigned int i;
@@ -253,7 +253,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
applog(LOG_INFO, "List of devices:");
unsigned int i;
- for(i=0; i < numDevices; i++) {
+ for (i = 0; i < numDevices; i++) {
char pbuff[100];
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
if (status != CL_SUCCESS) {
@@ -302,7 +302,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
}
find = strstr(extensions, camo);
if (find)
- clState->hasBitAlign = patchbfi = 1;
+ clState->hasBitAlign = true;
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&clState->preferred_vwidth, NULL);
if (status != CL_SUCCESS) {
@@ -439,6 +439,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (opt_debug)
applog(LOG_DEBUG, "Loaded binary image %s", binaryfilename);
+ /* We don't need to patch this already loaded image, but need to
+ * set the flag for status later */
+ if (clState->hasBitAlign)
+ patchbfi = true;
+
free(binaries[gpu]);
goto built;
}
@@ -473,21 +478,32 @@ build:
if (clState->hasBitAlign) {
strcat(CompilerOptions, " -DBITALIGN");
if (opt_debug)
- applog(LOG_DEBUG, "cl_amd_media_ops found, patched source with BITALIGN");
+ applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
+ if (strstr(name, "Cedar") ||
+ strstr(name, "Redwood") ||
+ strstr(name, "Juniper") ||
+ strstr(name, "Cypress" ) ||
+ strstr(name, "Hemlock" ) ||
+ strstr(name, "Caicos" ) ||
+ strstr(name, "Turks" ) ||
+ strstr(name, "Barts" ) ||
+ strstr(name, "Cayman" ) ||
+ strstr(name, "Antilles" ) ||
+ strstr(name, "Wrestler" ) ||
+ strstr(name, "Zacate" ) ||
+ strstr(name, "WinterPark" ) ||
+ strstr(name, "BeaverCreek" ))
+ patchbfi = true;
} else if (opt_debug)
- applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BITALIGN patch");
+ applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN");
if (patchbfi) {
strcat(CompilerOptions, " -DBFI_INT");
if (opt_debug)
- applog(LOG_DEBUG, "cl_amd_media_ops found, patched source with BFI_INT");
+ applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT");
} else if (opt_debug)
- applog(LOG_DEBUG, "cl_amd_media_ops not found, will not BFI_INT patch");
+ applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");
- //int n = 1000;
- //while(n--)
- // printf("%s", CompilerOptions);
- //return 1;
status = clBuildProgram(clState->program, 1, &devices[gpu], CompilerOptions , NULL, NULL);
if (status != CL_SUCCESS) {
@@ -521,7 +537,8 @@ build:
return NULL;
}
- /* Patch the kernel if the hardware supports BFI_INT */
+ /* Patch the kernel if the hardware supports BFI_INT but it needs to
+ * be hacked in */
if (patchbfi) {
unsigned remaining = binary_sizes[gpu];
char *w = binaries[gpu];
@@ -595,7 +612,7 @@ built:
free(binaries);
free(binary_sizes);
- applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT patching, %d vectors and worksize %d",
+ applog(LOG_INFO, "Initialising kernel %s with%s BFI_INT, %d vectors and worksize %d",
filename, patchbfi ? "" : "out", clState->preferred_vwidth, clState->work_size);
/* create a cl program executable for all the devices specified */
diff --git a/ocl.h b/ocl.h
index 3c2a5ce..22c0d83 100644
--- a/ocl.h
+++ b/ocl.h
@@ -1,6 +1,9 @@
#ifndef __OCL_H__
#define __OCL_H__
+
#include "config.h"
+
+#include <stdbool.h>
#ifdef HAVE_OPENCL
#ifdef __APPLE_CC__
#include <OpenCL/opencl.h>
@@ -14,7 +17,7 @@ typedef struct {
cl_command_queue commandQueue;
cl_program program;
cl_mem outputBuffer;
- int hasBitAlign;
+ bool hasBitAlign;
cl_uint preferred_vwidth;
size_t max_work_size;
size_t work_size;