Commit 2270b4e053fe669f300481480ef5033951fd0352

ckolivas 2012-02-10T14:33:40

Implement diablo kernel support and try to make it work.

diff --git a/DiabloMiner120210.cl b/DiabloMiner120210.cl
index 3025961..3a5bcb8 100644
--- a/DiabloMiner120210.cl
+++ b/DiabloMiner120210.cl
@@ -25,7 +25,7 @@ typedef uint z;
 #define Zrotr(a, b) rotate((z)a, (z)b)
 #endif
 
-#if BFIINT
+#if BFI_INT
 #define ZCh(a, b, c) amd_bytealign(a, b, c)
 #define ZMa(a, b, c) amd_bytealign((c ^ a), (b), (a))
 #else
@@ -60,24 +60,8 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
   z ZG[4];
   z ZH[4];
 
-  #ifdef USEBASE
-  uint noncebase = base + get_global_id(0);
-  #else
-  uint noncebase = get_global_id(0);
-  #endif
+  z Znonce = base + get_global_id(0);
 
-  #ifdef DOLOOPS
-  noncebase *= LOOPS;
-  #endif
-
-  z Znonce = noncebase;
-  uintzz nonce = (uintzz)0;
-
-  #ifdef DOLOOPS
-  uintzz loopout = 0;
-
-  for(int i = 0; i < LOOPS; i++) {
-  #endif
     ZA[0] = PreVal4_plus_state0 + Znonce;
     ZB[0] = PreVal4_plus_T1 + Znonce;
 
@@ -539,30 +523,33 @@ __kernel __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) void search(
     ZF[2] = ZA[1] + ZH[0] + 0x8cc70208U + ZR15(ZH[1]) + ZR25(ZA[3]) + ZA[2] + ZC[2] + ZCh(ZE[2], ZB[2], ZF[1]) + ZR26(ZE[2]);
     ZG[2] = ZG[1] + ZF[1] + ZR26(ZF[2]) + ZCh(ZF[2], ZE[2], ZB[2]) + ZR15(ZD[2]) + ZH[2] + ZR25(ZH[3]) + ZA[3];
 
-    bool Zio = any(ZG[2] == (z)0x136032EDU);
-
-    bool io = false;
-    io = (Zio) ? Zio : io;
-
-    nonce = Znonce;
-
-  #ifdef DOLOOPS
-    loopout = (io) ? nonce : loopout;
-
-    Znonce += (z)1;
-  }
+#define FOUND (0x80)
+#define NFLAG (0x7F)
 
-  nonce = loopout;
-
-  bool io = any(nonce > (uintzz)0);
-  #endif
-
-  #ifdef VSTORE
-  if(io) { vstorezz(nonce, 0, output); }
-  #else
-  if(io) { output[0] = (uintzz)nonce; }
-  #endif
+#if defined(VECTORS4)
+	ZG[2] ^= 0x136032EDU;
+	bool result = ZG[2].x & ZG[2].y & ZG[2].z & ZG[2].w;
+	if (!result) {
+		if (!ZG[2].x)
+			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
+		if (!ZG[2].y)
+			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
+		if (!ZG[2].z)
+			output[FOUND] = output[NFLAG & Znonce.z] =  Znonce.z;
+		if (!ZG[2].w)
+			output[FOUND] = output[NFLAG & Znonce.w] =  Znonce.w;
+	}
+#elif defined(VECTORS2)
+	ZG[2] ^= 0x136032EDU;
+	bool result = ZG[2].x & ZG[2].y;
+	if (!result) {
+		if (!ZG[2].x)
+			output[FOUND] = output[NFLAG & Znonce.x] =  Znonce.x;
+		if (!ZG[2].y)
+			output[FOUND] = output[NFLAG & Znonce.y] =  Znonce.y;
+	}
+#else
+	if (ZG[2] == 0x136032EDU)
+		output[FOUND] = output[NFLAG & Znonce] =  Znonce;
+#endif
 }
-
-// vim: set ft=c
-
diff --git a/configure.ac b/configure.ac
index b8a26b3..517559e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -296,6 +296,7 @@ AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to cgminer install])
 AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120203"], [Filename for phatk kernel])
 AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120203"], [Filename for poclbm kernel])
 AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120208"], [Filename for diakgcn kernel])
+AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["DiabloMiner120210"], [Filename for diablo kernel])
 
 
 AC_SUBST(OPENCL_LIBS)
diff --git a/device-gpu.c b/device-gpu.c
index 7b219bc..f0d7f87 100644
--- a/device-gpu.c
+++ b/device-gpu.c
@@ -806,6 +806,46 @@ static cl_int queue_diakgcn_kernel(_clState *clState, dev_blk_ctx *blk)
 	return status;
 }
 
+static cl_int queue_diablo_kernel(_clState *clState, dev_blk_ctx *blk)
+{
+	cl_kernel *kernel = &clState->kernel;
+	cl_int status = 0;
+	int num = 0;
+
+	CL_SET_BLKARG(nonce);
+	CL_SET_BLKARG(PreVal0);
+	CL_SET_BLKARG(PreVal4_2);
+	CL_SET_BLKARG(PreW18);
+	CL_SET_BLKARG(PreW19);
+	CL_SET_BLKARG(W16);
+	CL_SET_BLKARG(W17);
+	CL_SET_BLKARG(PreW31);
+	CL_SET_BLKARG(PreW32);
+
+	CL_SET_BLKARG(cty_d);
+	CL_SET_BLKARG(cty_b);
+	CL_SET_BLKARG(cty_c);
+	CL_SET_BLKARG(cty_h);
+	CL_SET_BLKARG(cty_f);
+	CL_SET_BLKARG(cty_g);
+
+	CL_SET_BLKARG(C1addK5);
+	CL_SET_BLKARG(B1addK6);
+
+	CL_SET_BLKARG(ctx_a);
+	CL_SET_BLKARG(ctx_b);
+	CL_SET_BLKARG(ctx_c);
+	CL_SET_BLKARG(ctx_d);
+	CL_SET_BLKARG(ctx_e);
+	CL_SET_BLKARG(ctx_f);
+	CL_SET_BLKARG(ctx_g);
+	CL_SET_BLKARG(ctx_h);
+
+	CL_SET_ARG(clState->outputBuffer);
+
+	return status;
+}
+
 static void set_threads_hashes(unsigned int vectors, unsigned int *threads,
 			       unsigned int *hashes, size_t *globalThreads,
 			       unsigned int minthreads, int intensity)
@@ -957,12 +997,17 @@ static void opencl_detect()
 		return;
 
 	if (opt_kernel) {
-		if (strcmp(opt_kernel, "poclbm") && strcmp(opt_kernel, "phatk") && strcmp(opt_kernel, "diakgcn"))
-			quit(1, "Invalid kernel name specified - must be poclbm, phatk or diakgcn");
+		if (strcmp(opt_kernel, "poclbm") &&
+		    strcmp(opt_kernel, "phatk") &&
+		    strcmp(opt_kernel, "diakgcn") &&
+		    strcmp(opt_kernel, "diablo"))
+			quit(1, "Invalid kernel name specified - must be poclbm, phatk, diakgcn or diablo");
 		if (!strcmp(opt_kernel, "diakgcn"))
 			chosen_kernel = KL_DIAKGCN;
 		else if (!strcmp(opt_kernel, "poclbm"))
 			chosen_kernel = KL_POCLBM;
+		else if (!strcmp(opt_kernel, "diablo"))
+			chosen_kernel = KL_DIABLO;
 		else
 			chosen_kernel = KL_PHATK;
 	} else
@@ -1100,6 +1145,9 @@ static bool opencl_thread_init(struct thr_info *thr)
 		case KL_DIAKGCN:
 			thrdata->queue_kernel_parameters = &queue_diakgcn_kernel;
 			break;
+		case KL_DIABLO:
+			thrdata->queue_kernel_parameters = &queue_diablo_kernel;
+			break;
 	}
 
 	thrdata->res = calloc(BUFFERSIZE, 1);
@@ -1184,7 +1232,6 @@ static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work,
 	}
 	set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads,
 			   localThreads[0], gpu->intensity);
-
 	status = thrdata->queue_kernel_parameters(clState, &work->blk);
 	if (unlikely(status != CL_SUCCESS)) {
 		applog(LOG_ERR, "Error: clSetKernelArg of all params failed.");
diff --git a/miner.h b/miner.h
index 0c11a32..42c0a16 100644
--- a/miner.h
+++ b/miner.h
@@ -680,6 +680,7 @@ enum cl_kernels {
 	KL_POCLBM,
 	KL_PHATK,
 	KL_DIAKGCN,
+	KL_DIABLO,
 };
 
 extern void get_datestamp(char *, struct timeval *);
diff --git a/ocl.c b/ocl.c
index 3f9e29c..677d6ca 100644
--- a/ocl.c
+++ b/ocl.c
@@ -362,7 +362,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 
 	if (chosen_kernel == KL_NONE) {
 		if (strstr(name, "Tahiti"))
-			clState->chosen_kernel = KL_DIAKGCN;
+			clState->chosen_kernel = KL_DIABLO;
 		else if (!clState->hasBitAlign)
 			clState->chosen_kernel = KL_POCLBM;
 		else
@@ -371,6 +371,10 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
 		clState->chosen_kernel = chosen_kernel;
 
 	switch (clState->chosen_kernel) {
+		case KL_DIABLO:
+			strcpy(filename, DIABLO_KERNNAME".cl");
+			strcpy(binaryfilename, DIABLO_KERNNAME);
+			break;
 		case KL_DIAKGCN:
 			strcpy(filename, DIAKGCN_KERNNAME".cl");
 			strcpy(binaryfilename, DIAKGCN_KERNNAME);