Clean up use of macros in poclbm and use bitselect everywhere possible.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
diff --git a/poclbm120222.cl b/poclbm120222.cl
index 8fb0090..a0a964c 100644
--- a/poclbm120222.cl
+++ b/poclbm120222.cl
@@ -36,7 +36,10 @@ __constant uint K[64] = {
#ifdef BITALIGN
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
#define rotr(x, y) amd_bitalign((u)x, (u)x, (u)y)
- #ifdef BFI_INT
+#else
+ #define rotr(x, y) rotate((u)x, (u)(32 - y))
+#endif
+#ifdef BFI_INT
// Well, slight problem... It turns out BFI_INT isn't actually exposed to
// OpenCL (or CAL IL for that matter) in any way. However, there is
// a similar instruction, BYTE_ALIGN_INT, which is exposed to OpenCL via
@@ -49,23 +52,19 @@ __constant uint K[64] = {
// Ma can also be implemented in terms of BFI_INT...
#define Ma(x, y, z) amd_bytealign( (z^x), (y), (x) )
- #else // BFI_INT
- // Later SDKs optimise this to BFI INT without patching and GCN
- // actually fails if manually patched with BFI_INT
+
+ // AMD's KernelAnalyzer throws errors compiling the kernel if we use
+ // amd_bytealign on constants with vectors enabled, so we use this to avoid
+ // problems. (this is used 4 times, and likely optimized out by the compiler.)
+ #define Ma2(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
+#else // BFI_INT
+ //GCN actually fails if manually patched with BFI_INT
#define ch(x, y, z) bitselect((u)z, (u)y, (u)x)
#define Ma(x, y, z) bitselect((u)x, (u)y, (u)z ^ (u)x)
-#endif
-#else // BITALIGN
- #define ch(x, y, z) (z ^ (x & (y ^ z)))
- #define Ma(x, y, z) ((x & z) | (y & (x | z)))
- #define rotr(x, y) rotate((u)x, (u)(32 - y))
+ #define Ma2(x, y, z) Ma(x, y, z)
#endif
-// AMD's KernelAnalyzer throws errors compiling the kernel if we use
-// amd_bytealign on constants with vectors enabled, so we use this to avoid
-// problems. (this is used 4 times, and likely optimized out by the compiler.)
-#define Ma2(x, y, z) ((y & z) | (x & (y | z)))
__kernel
__attribute__((vec_type_hint(u)))