Remove the rounding-up of the scrypt padbuffer which was not effectual and counter-productive on devices with lots of ram, limiting thread concurrencies and intensities.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
diff --git a/ocl.c b/ocl.c
index 5c30df0..8fcee93 100644
--- a/ocl.c
+++ b/ocl.c
@@ -475,9 +475,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
#ifdef USE_SCRYPT
if (opt_scrypt) {
- cl_ulong ma = cgpu->max_alloc, mt;
- int pow2 = 0;
-
if (!cgpu->opt_lg) {
applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
cgpu->lookup_gap = 2;
@@ -485,31 +482,15 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
cgpu->lookup_gap = cgpu->opt_lg;
if (!cgpu->opt_tc) {
- cgpu->thread_concurrency = ma / 32768 / cgpu->lookup_gap;
+ cgpu->thread_concurrency = cgpu->max_alloc / 32768 / cgpu->lookup_gap;
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
if (cgpu->thread_concurrency > cgpu->shaders * 5)
cgpu->thread_concurrency = cgpu->shaders * 5;
}
-
applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %u",gpu, cgpu->thread_concurrency);
} else
cgpu->thread_concurrency = cgpu->opt_tc;
-
- /* If we have memory to spare, try to find a power of 2 value
- * >= required amount to map nicely to an intensity */
- mt = cgpu->thread_concurrency * 32768 * cgpu->lookup_gap;
- if (ma > mt) {
- while (ma >>= 1)
- pow2++;
- ma = 1;
- while (--pow2 && ma < mt)
- ma <<= 1;
- if (ma >= mt) {
- cgpu->max_alloc = ma;
- applog(LOG_DEBUG, "Max alloc decreased to %lu", cgpu->max_alloc);
- }
- }
}
#endif