Commit 91e5cef3a58927131ce2aa603e47ba85c8e10a2f

Con Kolivas 2011-06-22T00:13:46

Actually get first BFI_INT patch working.

diff --git a/cpu-miner.c b/cpu-miner.c
index 4e8c8cd..16c6a9e 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -814,7 +814,6 @@ static void *gpuminer_thread(void *userdata)
 				BUFFERSIZE, res, 0, NULL, NULL);   
 		if (unlikely(status != CL_SUCCESS))
 			{ applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;}
-
 		for (i = 0; i < 128; i++) {
 			int found = false;
 
diff --git a/ocl.c b/ocl.c
index 264adc4..02e0d60 100644
--- a/ocl.c
+++ b/ocl.c
@@ -125,29 +125,29 @@ void patch_opcodes(char *w, unsigned remaining)
 		int s2_rel = (*opcode >> (32 + 9)) & 0x1;
 		int pred_sel = (*opcode >> 29) & 0x3;
 		if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) {
-		if (alu_inst == OP3_INST_BFE_INT) {
-			count_bfe_int++;
-		} else if (alu_inst == OP3_INST_BFE_UINT) {
-			count_bfe_uint++;
-		} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
-			count_byte_align++;
-			// patch this instruction to BFI_INT
-			*opcode &= 0xfffc1fffffffffffUL;
-			*opcode |= OP3_INST_BFI_INT << (32 + 13);
-			patched++;
-		}
+			if (alu_inst == OP3_INST_BFE_INT) {
+				count_bfe_int++;
+			} else if (alu_inst == OP3_INST_BFE_UINT) {
+				count_bfe_uint++;
+			} else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) {
+				count_byte_align++;
+				// patch this instruction to BFI_INT
+				*opcode &= 0xfffc1fffffffffffUL;
+				*opcode |= OP3_INST_BFI_INT << (32 + 13);
+				patched++;
+			}
 		}
 		if (remaining <= 8) {
-		break;
+			break;
 		}
 		opcode++;
 		remaining -= 8;
 	}
 	if (opt_debug) {
-		printf("Potential OP3 instructions identified: "
-			"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN\n",
+		applog(LOG_DEBUG, "Potential OP3 instructions identified: "
+			"%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN",
 			count_bfe_int, count_bfe_uint, count_byte_align);
-		printf("Patched a total of %i BFI_INT instructions\n", patched);
+		applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched);
 	}
 }
 
@@ -316,31 +316,34 @@ _clState *initCl(int gpu, char *name, size_t nameSize) {
 	}
 	err = clGetProgramInfo( clState->program, CL_PROGRAM_BINARIES, sizeof(char *)*nDevices, binaries, NULL );
 
-#if 0
 	for (i = 0; i < nDevices; i++) {
 		if (!binaries[i])
 			continue;
 
 		unsigned remaining = binary_sizes[i];
 		char *w = binaries[i];
-		const int ati_cal_markers = 17;
-		int j;
-		for (j = 0; j < ati_cal_markers; j++) {
-			if (opt_debug)
-			printf("At %p (%u rem. bytes), searching ATI CAL marker %i\n",
-				w, remaining, j);
-			advance(&w, &remaining, "ATI CAL");
-			if (remaining < 1)
-			fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1);
-			w++; remaining--;
-		}
-		if (remaining < 11)
-			fprintf(stderr, "Only %u rem. bytes\n", remaining), exit(1);
-		w += 11; remaining -= 11;
-		patch_opcodes(w, remaining);
-		exit (0);
+		unsigned int start, length;
+
+		/* Find 2nd incidence of .text, and copy the program's
+		 * position and length at a fixed offset from that. Then go
+		 * back and find the 2nd incidence of \x7ELF (rewind by one
+		 * from ELF) and then patch the opcocdes */
+		advance(&w, &remaining, ".text");
+		w++; remaining--;
+		advance(&w, &remaining, ".text");
+		memcpy(&start, w + 285, 4);
+		memcpy(&length, w + 289, 4);
+		w = binaries[i]; remaining = binary_sizes[i];
+		advance(&w, &remaining, "ELF");
+		w++; remaining--;
+		advance(&w, &remaining, "ELF");
+		w--; remaining++;
+		w += start; remaining -= start;
+		if (opt_debug)
+			printf("At %p (%u rem. bytes), to begin patching\n",
+				w, remaining);
+		patch_opcodes(w, length);
 	}
-#endif
 	status = clReleaseProgram(clState->program);
 	if(status != CL_SUCCESS)
 	{
diff --git a/oclminer.cl b/oclminer.cl
index d0650ac..40550ca 100644
--- a/oclminer.cl
+++ b/oclminer.cl
@@ -1,5 +1,7 @@
 typedef uint z;
 
+#define BITALIGN
+
 #ifdef BITALIGN
 #pragma OPENCL EXTENSION cl_amd_media_ops : enable
 #define rotr(a, b) amd_bitalign((z)a, (z)a, (z)b)