The longstanding generation of a zero sized binary appears to be due to the OpenCL library putting the binary in a RANDOM SLOT amongst 4 possible binary locations. Iterate over each of them after building from source till the real binary is found and use that.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
diff --git a/ocl.c b/ocl.c
index 5acb07f..fa7e5ab 100644
--- a/ocl.c
+++ b/ocl.c
@@ -389,18 +389,21 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
int pl;
char *source = file_contents(filename, &pl);
size_t sourceSize[] = {(size_t)pl};
+ cl_uint slot, cpnd;
+
+ slot = cpnd = 0;
if (!source)
return NULL;
- binary_sizes = (size_t *)malloc(sizeof(size_t)*numDevices);
+ binary_sizes = calloc(sizeof(size_t) * MAX_GPUDEVICES * 4, 1);
if (unlikely(!binary_sizes)) {
- applog(LOG_ERR, "Unable to malloc binary_sizes");
+ applog(LOG_ERR, "Unable to calloc binary_sizes");
return NULL;
}
- binaries = (char **)malloc(sizeof(char *)*numDevices);
+ binaries = calloc(sizeof(char *) * MAX_GPUDEVICES * 4, 1);
if (unlikely(!binaries)) {
- applog(LOG_ERR, "Unable to malloc binaries");
+ applog(LOG_ERR, "Unable to calloc binaries");
return NULL;
}
@@ -433,26 +436,26 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (!binary_stat.st_size)
goto build;
- binary_sizes[gpu] = binary_stat.st_size;
- binaries[gpu] = (char *)malloc(binary_sizes[gpu]);
- if (unlikely(!binaries[gpu])) {
- applog(LOG_ERR, "Unable to malloc binaries");
+ binary_sizes[slot] = binary_stat.st_size;
+ binaries[slot] = (char *)calloc(binary_sizes[slot], 1);
+ if (unlikely(!binaries[slot])) {
+ applog(LOG_ERR, "Unable to calloc binaries");
fclose(binaryfile);
return NULL;
}
- if (fread(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu]) {
- applog(LOG_ERR, "Unable to fread binaries[gpu]");
+ if (fread(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot]) {
+ applog(LOG_ERR, "Unable to fread binaries");
fclose(binaryfile);
- free(binaries[gpu]);
+ free(binaries[slot]);
goto build;
}
- clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL);
+ clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)binaries, &status, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)");
fclose(binaryfile);
- free(binaries[gpu]);
+ free(binaries[slot]);
goto build;
}
@@ -538,30 +541,43 @@ build:
prog_built = true;
- status = clGetProgramInfo( clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*numDevices, binary_sizes, NULL );
+ status = clGetProgramInfo(clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL);
if (unlikely(status != CL_SUCCESS)) {
- applog(LOG_ERR, "Error: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetPlatformInfo)");
+ applog(LOG_ERR, "Error: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)");
return NULL;
}
+ status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL);
+ if (unlikely(status != CL_SUCCESS)) {
+ applog(LOG_ERR, "Error: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)");
+ return NULL;
+ }
+
+ /* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
+ * to iterate over all the binary slots and find where the real program
+ * is. What the heck is this!? */
+ for (slot = 0; slot < cpnd; slot++)
+ if (binary_sizes[slot])
+ break;
+
/* copy over all of the generated binaries. */
- applog(LOG_DEBUG, "binary size %d : %d", gpu, binary_sizes[gpu]);
- if (!binary_sizes[gpu]) {
+ applog(LOG_DEBUG, "Binary size for gpu %d found in binary slot %d: %d", gpu, slot, binary_sizes[slot]);
+ if (!binary_sizes[slot]) {
applog(LOG_ERR, "OpenCL compiler generated a zero sized binary, may need to reboot!");
return NULL;
}
- binaries[gpu] = (char *)malloc( sizeof(char)*binary_sizes[gpu]);
- status = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*numDevices, binaries, NULL );
+ binaries[slot] = calloc(sizeof(char) * binary_sizes[slot], 1);
+ status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL );
if (unlikely(status != CL_SUCCESS)) {
- applog(LOG_ERR, "Error: Getting program info. (clGetPlatformInfo)");
+ applog(LOG_ERR, "Error: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)");
return NULL;
}
/* Patch the kernel if the hardware supports BFI_INT but it needs to
* be hacked in */
if (patchbfi) {
- unsigned remaining = binary_sizes[gpu];
- char *w = binaries[gpu];
+ unsigned remaining = binary_sizes[slot];
+ char *w = binaries[slot];
unsigned int start, length;
/* Find 2nd incidence of .text, and copy the program's
@@ -577,7 +593,7 @@ build:
}
memcpy(&start, w + 285, 4);
memcpy(&length, w + 289, 4);
- w = binaries[gpu]; remaining = binary_sizes[gpu];
+ w = binaries[slot]; remaining = binary_sizes[slot];
if (!advance(&w, &remaining, "ELF"))
goto build;
w++; remaining--;
@@ -597,7 +613,7 @@ build:
return NULL;
}
- clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[gpu], (const unsigned char **)&binaries[gpu], &status, NULL);
+ clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)&binaries[slot], &status, NULL);
if (status != CL_SUCCESS) {
applog(LOG_ERR, "Error: Loading Binary into cl_program (clCreateProgramWithBinary)");
return NULL;
@@ -621,15 +637,15 @@ build:
/* Not a fatal problem, just means we build it again next time */
applog(LOG_DEBUG, "Unable to create file %s", binaryfilename);
} else {
- if (unlikely(fwrite(binaries[gpu], 1, binary_sizes[gpu], binaryfile) != binary_sizes[gpu])) {
+ if (unlikely(fwrite(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot])) {
applog(LOG_ERR, "Unable to fwrite to binaryfile");
return NULL;
}
fclose(binaryfile);
}
built:
- if (binaries[gpu])
- free(binaries[gpu]);
+ if (binaries[slot])
+ free(binaries[slot]);
free(binaries);
free(binary_sizes);