Set cl_amd_media_ops with the BITALIGN flag and allow non-bitselect devices to build.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
diff --git a/diakgcn120208.cl b/diakgcn120208.cl
index 5038838..be84cb4 100644
--- a/diakgcn120208.cl
+++ b/diakgcn120208.cl
@@ -13,19 +13,24 @@
typedef uint u;
#endif
-#ifdef BFI_INT
+#ifdef BITALIGN
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
- #define Ch(x, y, z) amd_bytealign(x, y, z)
- #define Ma(x, y, z) amd_bytealign(z ^ x, y, x)
-#else
- #define Ch(x, y, z) bitselect(z, y, x)
- #if defined(VECTORS2) || defined(VECTORS4) || defined(VECTORS8)
- // GCN - VEC2 or VEC4
- #define Ma(z, x, y) bitselect(z, y, z ^ x)
+ #ifdef BFI_INT
+ #define Ch(x, y, z) amd_bytealign(x, y, z)
+ #define Ma(x, y, z) amd_bytealign(z ^ x, y, x)
#else
- // GCN - no VEC
- #define Ma(z, x, y) Ch(z ^ x, y, x)
+ #define Ch(x, y, z) bitselect(z, y, x)
+ #if defined(VECTORS2) || defined(VECTORS4) || defined(VECTORS8)
+ // GCN - VEC2 or VEC4
+ #define Ma(z, x, y) bitselect(z, y, z ^ x)
+ #else
+ // GCN - no VEC
+ #define Ma(z, x, y) Ch(z ^ x, y, x)
+ #endif
#endif
+#else //BITALIGN
+ #define Ch(x, y, z) (z ^ (x & (y ^ z)))
+ #define Ma(x, y, z) ((x & z) | (y & (x | z)))
#endif
#ifdef GOFFSET