Commit 71ab39dbbe784b4a89e26b4f90dc44ef0e393440

Philip Kaufmann 2012-02-23T09:20:51

Explicitly type the constants in diakgcn kernel as uint, to be in line with poclbm kernel.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
diff --git a/diakgcn120222.cl b/diakgcn120222.cl
index f361403..71f6345 100644
--- a/diakgcn120222.cl
+++ b/diakgcn120222.cl
@@ -1,4 +1,4 @@
-// DiaKGCN 22-02-2012 - OpenCL kernel by Diapolo
+// DiaKGCN 23-02-2012 - OpenCL kernel by Diapolo
 //
 // Parts and / or ideas for this kernel are based upon the public-domain poclbm project, the phatk kernel by Phateus and the DiabloMiner kernel by DiabloD3.
 // The kernel was rewritten by me (Diapolo) and is still public-domain!
@@ -76,29 +76,29 @@ __kernel
 	V[4] += PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]);
 	V[0] =  PreVal0addK7 + nonce + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0xd807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xd807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0xd807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0xc19bf3f4 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0xc19bf3f4 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0xc19bf3f4U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
 	V[3] += W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
 	V[7] =  W16addK16 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
@@ -110,9 +110,9 @@ __kernel
 
 	 W[0] = PreW18 + rotr25(nonce);
 	 W[1] = PreW19 + nonce;
-	 W[2] = 0x80000000 + rotr15(W[0]);
+	 W[2] = 0x80000000U + rotr15(W[0]);
 	 W[3] = rotr15(W[1]);
-	 W[4] = 0x00000280 + rotr15(W[2]);
+	 W[4] = 0x00000280U + rotr15(W[2]);
 	 W[5] = W16 + rotr15(W[3]);
 	 W[6] = W17 + rotr15(W[4]);
 	 W[7] = W[0] + rotr15(W[5]);
@@ -120,58 +120,58 @@ __kernel
 	 W[9] = W[2] + rotr15(W[7]);
 	W[10] = W[3] + rotr15(W[8]);
 	W[11] = W[4] + rotr15(W[9]);
-	W[12] = W[5] + 0x00a00055 + rotr15(W[10]);
+	W[12] = W[5] + 0x00a00055U + rotr15(W[10]);
 	W[13] = W[6] + PreW31 + rotr15(W[11]);
 	W[14] = W[7] + PreW32 + rotr15(W[12]);
 	W[15] = W[8] + W17 + rotr15(W[13]) + rotr25(W[0]);
 
-	V[1] += 0x0fc19dc6 + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x0fc19dc6 + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x0fc19dc6U + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x0fc19dc6U + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x240ca1cc + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x240ca1cc + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x240ca1ccU + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x2de92c6f + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x2de92c6f + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x2de92c6fU + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x4a7484aa + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4a7484aa + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x4a7484aaU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x5cb0a9dc + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5cb0a9dc + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x5cb0a9dcU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x76f988da + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x76f988da + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x76f988daU + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0x983e5152 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x983e5152 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x983e5152U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0xa831c66d + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa831c66d + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0xa831c66dU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0xb00327c8 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xb00327c8 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0xb00327c8U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0xbf597fc7 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xbf597fc7 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0xbf597fc7U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0xc6e00bf3 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xc6e00bf3 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0xc6e00bf3U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0xd5a79147 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd5a79147 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0xd5a79147U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x06ca6351 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x06ca6351 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x06ca6351U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x14292967 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x14292967 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x14292967U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0x27b70a85 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x27b70a85 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x27b70a85U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x2e1b2138 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x2e1b2138 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0x2e1b2138U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
 //----------------------------------------------------------------------------------
 
@@ -192,53 +192,53 @@ __kernel
 	W[14] = W[14] +  W[7] + rotr15(W[12]) + rotr25(W[15]);
 	W[15] = W[15] +  W[8] + rotr15(W[13]) + rotr25( W[0]);
 
-	V[1] += 0x4d2c6dfc + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x4d2c6dfc + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x4d2c6dfcU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x53380d13 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x53380d13 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x53380d13U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x650a7354 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x650a7354 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x650a7354U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x766a0abb + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x766a0abb + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x766a0abbU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x81c2c92e + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x81c2c92e + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x81c2c92eU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x92722c85 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x92722c85 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x92722c85U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0xa2bfe8a1 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xa2bfe8a1 + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0xa2bfe8a1U + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0xa81a664b + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa81a664b + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0xa81a664bU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0xc24b8b70 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xc24b8b70 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0xc24b8b70U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0xc76c51a3 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xc76c51a3 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0xc76c51a3U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0xd192e819 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xd192e819 + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0xd192e819U + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0xd6990624 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd6990624 + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0xd6990624U + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0xf40e3585 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0xf40e3585 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0xf40e3585U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x106aa070 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x106aa070 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x106aa070U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0x19a4c116 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x19a4c116 + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x19a4c116U + V[7] + W[14] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x1e376c08 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x1e376c08 + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0x1e376c08U + V[6] + W[15] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
 //----------------------------------------------------------------------------------
 
@@ -257,92 +257,92 @@ __kernel
 	W[12] = W[12] +  W[5] + rotr15(W[10]) + rotr25(W[13]);
 	W[13] = W[13] +  W[6] + rotr15(W[11]) + rotr25(W[14]);
 
-	V[1] += 0x2748774c + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x2748774c + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x2748774cU + V[5] + W[0] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x34b0bcb5 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x34b0bcb5 + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x34b0bcb5U + V[4] + W[1] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x391c0cb3 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x391c0cb3 + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x391c0cb3U + V[3] + W[2] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x4ed8aa4a + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4ed8aa4a + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x4ed8aa4aU + V[2] + W[3] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x5b9cca4f + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5b9cca4f + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x5b9cca4fU + V[1] + W[4] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x682e6ff3 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x682e6ff3 + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x682e6ff3U + V[0] + W[5] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0x748f82ee + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x748f82ee + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x748f82eeU + V[7] + W[6] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x78a5636f + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x78a5636f + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0x78a5636fU + V[6] + W[7] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0x84c87814 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x84c87814 + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x84c87814U + V[5] + W[8] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x8cc70208 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x8cc70208 + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x8cc70208U + V[4] + W[9] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x90befffa + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x90befffa + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x90befffaU + V[3] + W[10] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0xa4506ceb + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xa4506ceb + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0xa4506cebU + V[2] + W[11] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0xbef9a3f7 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0xbef9a3f7 + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0xbef9a3f7U + V[1] + W[12] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0xc67178f2 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0xc67178f2 + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0xc67178f2U + V[0] + W[13] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
 //----------------------------------------------------------------------------------
 
 	 W[0] = state0 + V[0] + rotr25(state1 + V[1]);
-	 W[1] = state1 + V[1] + 0x00a00000 + rotr25(state2 + V[2]);
+	 W[1] = state1 + V[1] + 0x00a00000U + rotr25(state2 + V[2]);
 	 W[2] = state2 + V[2] + rotr15(W[0]) + rotr25(state3 + V[3]);
 	 W[3] = state3 + V[3] + rotr15(W[1]) + rotr25(state4 + V[4]);
 	 W[4] = state4 + V[4] + rotr15(W[2]) + rotr25(state5 + V[5]);
 	 W[5] = state5 + V[5] + rotr15(W[3]) + rotr25(state6 + V[6]);
-	 W[6] = state6 + V[6] + 0x00000100 + rotr15(W[4]) + rotr25(state7 + V[7]);	
-	 W[7] = state7 + V[7] + W[0] + 0x11002000 + rotr15(W[5]);
-	 W[8] = W[1] + 0x80000000 + rotr15(W[6]);	
+	 W[6] = state6 + V[6] + 0x00000100U + rotr15(W[4]) + rotr25(state7 + V[7]);	
+	 W[7] = state7 + V[7] + W[0] + 0x11002000U + rotr15(W[5]);
+	 W[8] = W[1] + 0x80000000U + rotr15(W[6]);	
 	 W[9] = W[2] + rotr15(W[7]);
 	W[10] = W[3] + rotr15(W[8]);
 	W[11] = W[4] + rotr15(W[9]);
 	W[12] = W[5] + rotr15(W[10]);
 	W[13] = W[6] + rotr15(W[11]);
-	W[14] = W[7] + 0x00400022 + rotr15(W[12]);
-	W[15] = W[8] + 0x00000100 + rotr15(W[13]) + rotr25(W[0]);
+	W[14] = W[7] + 0x00400022U + rotr15(W[12]);
+	W[15] = W[8] + 0x00000100U + rotr15(W[13]) + rotr25(W[0]);
 
-	// 0x71374491 + 0x1f83d9ab + state1
+	// 0x71374491U + 0x1f83d9abU + state1
 	const u state1AaddV1 = state1A + V[1];
-	// 0xb5c0fbcf + 0x9b05688c + state2
+	// 0xb5c0fbcfU + 0x9b05688cU + state2
 	const u state2AaddV2 = state2A + V[2];
-	// 0x510e527f + 0xe9b5dba5 + state3
+	// 0x510e527fU + 0xe9b5dba5U + state3
 	const u state3AaddV3 = state3A + V[3];
-	// 0x3956c25b + state4
+	// 0x3956c25bU + state4
 	const u state4AaddV4 = state4A + V[4];
-	// 0x59f111f1 + state5
+	// 0x59f111f1U + state5
 	const u state5AaddV5 = state5A + V[5];
-	// 0x923f82a4 + state6
+	// 0x923f82a4U + state6
 	const u state6AaddV6 = state6A + V[6];
-	// 0xab1c5ed5 + state7
+	// 0xab1c5ed5U + state7
 	const u state7AaddV7 = state7A + V[7];
 
-	// 0x98c7e2a2 + state0	
+	// 0x98c7e2a2U + state0	
 	V[3] = state0A + V[0];
-	// 0xfc08884d + state0
+	// 0xfc08884dU + state0
 	V[7] = state0B + V[0];
-	V[0] = 0x6a09e667;
-	V[1] = 0xbb67ae85;
-	V[2] = 0x3c6ef372;
-	V[4] = 0x510e527f;
-	V[5] = 0x9b05688c;
-	V[6] = 0x1f83d9ab;
+	V[0] = 0x6a09e667U;
+	V[1] = 0xbb67ae85U;
+	V[2] = 0x3c6ef372U;
+	V[4] = 0x510e527fU;
+	V[5] = 0x9b05688cU;
+	V[6] = 0x1f83d9abU;
 
 	V[2] += state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]);
 	V[6] =  state1AaddV1 + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
@@ -365,77 +365,77 @@ __kernel
 	V[4] += state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
 	V[0] =  state7AaddV7 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0x5807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x5807aa98 + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x5807aa98U + V[7] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x12835b01 + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0x12835b01U + V[6] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x243185be + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x243185beU + V[5] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x550c7dc3 + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x550c7dc3U + V[4] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x72be5d74 + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x72be5d74U + V[3] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x80deb1fe + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x80deb1feU + V[2] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x9bdc06a7 + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x9bdc06a7U + V[1] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0xc19bf274 + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0xc19bf274U + V[0] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xe49b69c1 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0xe49b69c1U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0xefbe4786 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xefbe4786 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0xefbe4786U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0x0fc19dc6 + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x0fc19dc6 + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x0fc19dc6U + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x240ca1cc + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x240ca1cc + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x240ca1ccU + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x2de92c6f + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x2de92c6f + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x2de92c6fU + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x4a7484aa + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4a7484aa + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x4a7484aaU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x5cb0a9dc + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5cb0a9dc + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x5cb0a9dcU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x76f988da + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x76f988da + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x76f988daU + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0x983e5152 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x983e5152 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x983e5152U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0xa831c66d + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa831c66d + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0xa831c66dU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0xb00327c8 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xb00327c8 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0xb00327c8U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0xbf597fc7 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xbf597fc7 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0xbf597fc7U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0xc6e00bf3 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xc6e00bf3 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0xc6e00bf3U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0xd5a79147 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd5a79147 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0xd5a79147U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x06ca6351 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x06ca6351 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x06ca6351U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x14292967 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x14292967 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x14292967U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
 //----------------------------------------------------------------------------------
 
@@ -456,53 +456,53 @@ __kernel
 	W[14] = W[14] +  W[7] + rotr15(W[12]) + rotr25(W[15]);
 	W[15] = W[15] +  W[8] + rotr15(W[13]) + rotr25( W[0]);
 
-	V[3] += 0x27b70a85 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x27b70a85 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x27b70a85U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x2e1b2138 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x2e1b2138 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0x2e1b2138U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0x4d2c6dfc + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x4d2c6dfc + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x4d2c6dfcU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x53380d13 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x53380d13 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x53380d13U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x650a7354 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x650a7354 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x650a7354U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x766a0abb + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x766a0abb + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x766a0abbU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x81c2c92e + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x81c2c92e + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x81c2c92eU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x92722c85 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x92722c85 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x92722c85U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0xa2bfe8a1 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0xa2bfe8a1 + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0xa2bfe8a1U + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0xa81a664b + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0xa81a664b + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0xa81a664bU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0xc24b8b70 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0xc24b8b70 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0xc24b8b70U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0xc76c51a3 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0xc76c51a3 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0xc76c51a3U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0xd192e819 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0xd192e819 + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0xd192e819U + V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0xd6990624 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0xd6990624 + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0xd6990624U + V[2] + W[13] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0xf40e3585 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0xf40e3585 + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0xf40e3585U + V[1] + W[14] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x106aa070 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x106aa070 + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x106aa070U + V[0] + W[15] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
 //----------------------------------------------------------------------------------
 
@@ -520,38 +520,38 @@ __kernel
 	W[11] = W[11] +  W[4] + rotr15( W[9]) + rotr25(W[12]);
 	W[12] = W[12] +  W[5] + rotr15(W[10]) + rotr25(W[13]);
 
-	V[3] += 0x19a4c116 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x19a4c116 + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x19a4c116U + V[7] + W[0] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x1e376c08 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
-	V[6] =  0x1e376c08 + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
+	V[2] += 0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[6] =  0x1e376c08U + V[6] + W[1] + ch(V[3], V[4], V[5]) + rotr26(V[3]) + rotr30(V[7]) + ma(V[0], V[1], V[7]);
 
-	V[1] += 0x2748774c + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
-	V[5] =  0x2748774c + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
+	V[1] += 0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[5] =  0x2748774cU + V[5] + W[2] + ch(V[2], V[3], V[4]) + rotr26(V[2]) + rotr30(V[6]) + ma(V[7], V[0], V[6]);
 
-	V[0] += 0x34b0bcb5 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
-	V[4] =  0x34b0bcb5 + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
+	V[0] += 0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[4] =  0x34b0bcb5U + V[4] + W[3] + ch(V[1], V[2], V[3]) + rotr26(V[1]) + rotr30(V[5]) + ma(V[6], V[7], V[5]);
 
-	V[7] += 0x391c0cb3 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
-	V[3] =  0x391c0cb3 + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
+	V[7] += 0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
+	V[3] =  0x391c0cb3U + V[3] + W[4] + ch(V[0], V[1], V[2]) + rotr26(V[0]) + rotr30(V[4]) + ma(V[5], V[6], V[4]);
 
-	V[6] += 0x4ed8aa4a + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
-	V[2] =  0x4ed8aa4a + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
+	V[6] += 0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]);
+	V[2] =  0x4ed8aa4aU + V[2] + W[5] + ch(V[7], V[0], V[1]) + rotr26(V[7]) + rotr30(V[3]) + ma(V[4], V[5], V[3]);
 
-	V[5] += 0x5b9cca4f + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
-	V[1] =  0x5b9cca4f + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
+	V[5] += 0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]);
+	V[1] =  0x5b9cca4fU + V[1] + W[6] + ch(V[6], V[7], V[0]) + rotr26(V[6]) + rotr30(V[2]) + ma(V[3], V[4], V[2]);
 
-	V[4] += 0x682e6ff3 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
-	V[0] =  0x682e6ff3 + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
+	V[4] += 0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]);
+	V[0] =  0x682e6ff3U + V[0] + W[7] + ch(V[5], V[6], V[7]) + rotr26(V[5]) + rotr30(V[1]) + ma(V[2], V[3], V[1]);
 
-	V[3] += 0x748f82ee + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
-	V[7] =  0x748f82ee + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
+	V[3] += 0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]);
+	V[7] =  0x748f82eeU + V[7] + W[8] + ch(V[4], V[5], V[6]) + rotr26(V[4]) + rotr30(V[0]) + ma(V[1], V[2], V[0]);
 
-	V[2] += 0x78a5636f + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
+	V[2] += 0x78a5636fU + V[6] + W[9] + ch(V[3], V[4], V[5]) + rotr26(V[3]);
 
-	V[1] += 0x84c87814 + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
+	V[1] += 0x84c87814U + V[5] + W[10] + ch(V[2], V[3], V[4]) + rotr26(V[2]);
 
-	V[0] += 0x8cc70208 + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
+	V[0] += 0x8cc70208U + V[4] + W[11] + ch(V[1], V[2], V[3]) + rotr26(V[1]);
 
 	V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
 
@@ -560,53 +560,53 @@ __kernel
 #define NFLAG (0x7F)
 
 #ifdef VECTORS8
-	bool result = any(V[7] == 0x136032ed);
+	bool result = any(V[7] == 0x136032edU);
 
 	if (result) {
 		output[FOUND] = FOUND;
-		if (V[7].s0 == 0x136032ed)
+		if (V[7].s0 == 0x136032edU)
 			output[NFLAG & nonce.s0] = nonce.s0;
-		if (V[7].s1 == 0x136032ed)
+		if (V[7].s1 == 0x136032edU)
 			output[NFLAG & nonce.s1] = nonce.s1;
-		if (V[7].s2 == 0x136032ed)
+		if (V[7].s2 == 0x136032edU)
 			output[NFLAG & nonce.s2] = nonce.s2;
-		if (V[7].s3 == 0x136032ed)
+		if (V[7].s3 == 0x136032edU)
 			output[NFLAG & nonce.s3] = nonce.s3;
-		if (V[7].s4 == 0x136032ed)
+		if (V[7].s4 == 0x136032edU)
 			output[NFLAG & nonce.s4] = nonce.s4;
-		if (V[7].s5 == 0x136032ed)
+		if (V[7].s5 == 0x136032edU)
 			output[NFLAG & nonce.s5] = nonce.s5;
-		if (V[7].s6 == 0x136032ed)
+		if (V[7].s6 == 0x136032edU)
 			output[NFLAG & nonce.s6] = nonce.s6;
-		if (V[7].s7 == 0x136032ed)
+		if (V[7].s7 == 0x136032edU)
 			output[NFLAG & nonce.s7] = nonce.s7;
 	}
 #elif defined VECTORS4
-	bool result = any(V[7] == 0x136032ed);
+	bool result = any(V[7] == 0x136032edU);
 
 	if (result) {
 		output[FOUND] = FOUND;
-		if (V[7].x == 0x136032ed)
+		if (V[7].x == 0x136032edU)
 			output[NFLAG & nonce.x] = nonce.x;
-		if (V[7].y == 0x136032ed)
+		if (V[7].y == 0x136032edU)
 			output[NFLAG & nonce.y] = nonce.y;
-		if (V[7].z == 0x136032ed)
+		if (V[7].z == 0x136032edU)
 			output[NFLAG & nonce.z] = nonce.z;
-		if (V[7].w == 0x136032ed)
+		if (V[7].w == 0x136032edU)
 			output[NFLAG & nonce.w] = nonce.w;
 	}
 #elif defined VECTORS2
-	bool result = any(V[7]  == 0x136032ed);
+	bool result = any(V[7]  == 0x136032edU);
 
 	if (result) {
 		output[FOUND] = FOUND;
-		if (V[7].x == 0x136032ed)
+		if (V[7].x == 0x136032edU)
 			output[NFLAG & nonce.x] = nonce.x;
-		if (V[7].y == 0x136032ed)
+		if (V[7].y == 0x136032edU)
 			output[NFLAG & nonce.y] = nonce.y;
 	}
 #else
-	if (V[7] == 0x136032ed)
+	if (V[7] == 0x136032edU)
 		output[FOUND] = output[NFLAG & nonce] = nonce;
 #endif
 }