Commit 56144eed1ea141e2593ac2257a3b3aa69e455816

Daniel Mendler 2019-10-29T20:08:42

simplifications: reduce functions

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
diff --git a/mp_dr_reduce.c b/mp_dr_reduce.c
index fba0e21..d630246 100644
--- a/mp_dr_reduce.c
+++ b/mp_dr_reduce.c
@@ -19,16 +19,12 @@
  */
 mp_err mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k)
 {
-   mp_err      err;
-   int i, m;
-   mp_word  r;
-   mp_digit mu, *tmpx1, *tmpx2;
-
    /* m = digits in modulus */
-   m = n->used;
+   int m = n->used;
 
    /* ensure that "x" has at least 2m digits */
    if (x->alloc < (m + m)) {
+      mp_err err;
       if ((err = mp_grow(x, m + m)) != MP_OKAY) {
          return err;
       }
@@ -37,41 +33,37 @@ mp_err mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k)
    /* top of loop, this is where the code resumes if
     * another reduction pass is required.
     */
-top:
-   /* aliases for digits */
-   /* alias for lower half of x */
-   tmpx1 = x->dp;
-
-   /* alias for upper half of x, or x/B**m */
-   tmpx2 = x->dp + m;
+   for (;;) {
+      mp_err err;
+      int i;
+      mp_digit mu = 0;
 
-   /* set carry to zero */
-   mu = 0;
+      /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
+      for (i = 0; i < m; i++) {
+         mp_word r         = ((mp_word)x->dp[i + m] * (mp_word)k) + x->dp[i] + mu;
+         x->dp[i]  = (mp_digit)(r & MP_MASK);
+         mu        = (mp_digit)(r >> ((mp_word)MP_DIGIT_BIT));
+      }
 
-   /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
-   for (i = 0; i < m; i++) {
-      r         = ((mp_word)*tmpx2++ * (mp_word)k) + *tmpx1 + mu;
-      *tmpx1++  = (mp_digit)(r & MP_MASK);
-      mu        = (mp_digit)(r >> ((mp_word)MP_DIGIT_BIT));
-   }
+      /* set final carry */
+      x->dp[i] = mu;
 
-   /* set final carry */
-   *tmpx1++ = mu;
+      /* zero words above m */
+      MP_ZERO_DIGITS(x->dp + m + 1, (x->used - m) - 1);
 
-   /* zero words above m */
-   MP_ZERO_DIGITS(tmpx1, (x->used - m) - 1);
+      /* clamp, sub and return */
+      mp_clamp(x);
 
-   /* clamp, sub and return */
-   mp_clamp(x);
+      /* if x >= n then subtract and reduce again
+       * Each successive "recursion" makes the input smaller and smaller.
+       */
+      if (mp_cmp_mag(x, n) == MP_LT) {
+         break;
+      }
 
-   /* if x >= n then subtract and reduce again
-    * Each successive "recursion" makes the input smaller and smaller.
-    */
-   if (mp_cmp_mag(x, n) != MP_LT) {
       if ((err = s_mp_sub(x, n, x)) != MP_OKAY) {
          return err;
       }
-      goto top;
    }
    return MP_OKAY;
 }
diff --git a/mp_montgomery_reduce.c b/mp_montgomery_reduce.c
index a872aba..6a5be26 100644
--- a/mp_montgomery_reduce.c
+++ b/mp_montgomery_reduce.c
@@ -6,9 +6,7 @@
 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
 mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
 {
-   int      ix, digs;
-   mp_err   err;
-   mp_digit mu;
+   int ix, digs;
 
    /* can the fast reduction [comba] method be used?
     *
@@ -25,6 +23,7 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
 
    /* grow the input as required */
    if (x->alloc < digs) {
+      mp_err err;
       if ((err = mp_grow(x, digs)) != MP_OKAY) {
          return err;
       }
@@ -32,6 +31,9 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
    x->used = digs;
 
    for (ix = 0; ix < n->used; ix++) {
+      int iy;
+      mp_digit u, mu;
+
       /* mu = ai * rho mod b
        *
        * The value of rho must be precalculated via
@@ -43,41 +45,28 @@ mp_err mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
       mu = (mp_digit)(((mp_word)x->dp[ix] * (mp_word)rho) & MP_MASK);
 
       /* a = a + mu * m * b**i */
-      {
-         int iy;
-         mp_digit *tmpn, *tmpx, u;
-         mp_word r;
-
-         /* alias for digits of the modulus */
-         tmpn = n->dp;
-
-         /* alias for the digits of x [the input] */
-         tmpx = x->dp + ix;
-
-         /* set the carry to zero */
-         u = 0;
 
-         /* Multiply and add in place */
-         for (iy = 0; iy < n->used; iy++) {
-            /* compute product and sum */
-            r       = ((mp_word)mu * (mp_word)*tmpn++) +
-                      (mp_word)u + (mp_word)*tmpx;
+      /* Multiply and add in place */
+      u = 0;
+      for (iy = 0; iy < n->used; iy++) {
+         /* compute product and sum */
+         mp_word r = ((mp_word)mu * (mp_word)n->dp[iy]) +
+                     (mp_word)u + (mp_word)x->dp[ix + iy];
 
-            /* get carry */
-            u       = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
+         /* get carry */
+         u       = (mp_digit)(r >> (mp_word)MP_DIGIT_BIT);
 
-            /* fix digit */
-            *tmpx++ = (mp_digit)(r & (mp_word)MP_MASK);
-         }
-         /* At this point the ix'th digit of x should be zero */
-
-
-         /* propagate carries upwards as required*/
-         while (u != 0u) {
-            *tmpx   += u;
-            u        = *tmpx >> MP_DIGIT_BIT;
-            *tmpx++ &= MP_MASK;
-         }
+         /* fix digit */
+         x->dp[ix + iy] = (mp_digit)(r & (mp_word)MP_MASK);
+      }
+      /* At this point the ix'th digit of x should be zero */
+
+      /* propagate carries upwards as required*/
+      while (u != 0u) {
+         x->dp[ix + iy]   += u;
+         u        = x->dp[ix + iy] >> MP_DIGIT_BIT;
+         x->dp[ix + iy] &= MP_MASK;
+         ++iy;
       }
    }
 
diff --git a/mp_reduce_2k.c b/mp_reduce_2k.c
index 5d3c7f9..e635f5b 100644
--- a/mp_reduce_2k.c
+++ b/mp_reduce_2k.c
@@ -8,36 +8,37 @@ mp_err mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
 {
    mp_int q;
    mp_err err;
-   int    p;
+   int p;
 
    if ((err = mp_init(&q)) != MP_OKAY) {
       return err;
    }
 
    p = mp_count_bits(n);
-top:
-   /* q = a/2**p, a = a mod 2**p */
-   if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
-      goto LBL_ERR;
-   }
-
-   if (d != 1u) {
-      /* q = q * d */
-      if ((err = mp_mul_d(&q, d, &q)) != MP_OKAY) {
+   for (;;) {
+      /* q = a/2**p, a = a mod 2**p */
+      if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
          goto LBL_ERR;
       }
-   }
 
-   /* a = a + q */
-   if ((err = s_mp_add(a, &q, a)) != MP_OKAY) {
-      goto LBL_ERR;
-   }
+      if (d != 1u) {
+         /* q = q * d */
+         if ((err = mp_mul_d(&q, d, &q)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+      }
 
-   if (mp_cmp_mag(a, n) != MP_LT) {
+      /* a = a + q */
+      if ((err = s_mp_add(a, &q, a)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+
+      if (mp_cmp_mag(a, n) == MP_LT) {
+         break;
+      }
       if ((err = s_mp_sub(a, n, a)) != MP_OKAY) {
          goto LBL_ERR;
       }
-      goto top;
    }
 
 LBL_ERR:
diff --git a/mp_reduce_2k_l.c b/mp_reduce_2k_l.c
index 6328cbc..31d9a18 100644
--- a/mp_reduce_2k_l.c
+++ b/mp_reduce_2k_l.c
@@ -18,27 +18,30 @@ mp_err mp_reduce_2k_l(mp_int *a, const mp_int *n, const mp_int *d)
    }
 
    p = mp_count_bits(n);
-top:
-   /* q = a/2**p, a = a mod 2**p */
-   if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
-      goto LBL_ERR;
-   }
 
-   /* q = q * d */
-   if ((err = mp_mul(&q, d, &q)) != MP_OKAY) {
-      goto LBL_ERR;
-   }
+   for (;;) {
+      /* q = a/2**p, a = a mod 2**p */
+      if ((err = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
 
-   /* a = a + q */
-   if ((err = s_mp_add(a, &q, a)) != MP_OKAY) {
-      goto LBL_ERR;
-   }
+      /* q = q * d */
+      if ((err = mp_mul(&q, d, &q)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+
+      /* a = a + q */
+      if ((err = s_mp_add(a, &q, a)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
 
-   if (mp_cmp_mag(a, n) != MP_LT) {
+      if (mp_cmp_mag(a, n) == MP_LT) {
+         break;
+      }
       if ((err = s_mp_sub(a, n, a)) != MP_OKAY) {
          goto LBL_ERR;
       }
-      goto top;
+
    }
 
 LBL_ERR:
diff --git a/mp_reduce_2k_setup.c b/mp_reduce_2k_setup.c
index 0f3fd29..51f8841 100644
--- a/mp_reduce_2k_setup.c
+++ b/mp_reduce_2k_setup.c
@@ -8,25 +8,23 @@ mp_err mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
 {
    mp_err err;
    mp_int tmp;
-   int    p;
 
    if ((err = mp_init(&tmp)) != MP_OKAY) {
       return err;
    }
 
-   p = mp_count_bits(a);
-   if ((err = mp_2expt(&tmp, p)) != MP_OKAY) {
-      mp_clear(&tmp);
-      return err;
+   if ((err = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
+      goto LBL_ERR;
    }
 
    if ((err = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
-      mp_clear(&tmp);
-      return err;
+      goto LBL_ERR;
    }
 
    *d = tmp.dp[0];
+
+LBL_ERR:
    mp_clear(&tmp);
-   return MP_OKAY;
+   return err;
 }
 #endif
diff --git a/s_mp_montgomery_reduce_fast.c b/s_mp_montgomery_reduce_fast.c
index 083e7a4..a78c537 100644
--- a/s_mp_montgomery_reduce_fast.c
+++ b/s_mp_montgomery_reduce_fast.c
@@ -13,7 +13,7 @@
 */
 mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho)
 {
-   int     ix, olduse;
+   int     ix, oldused;
    mp_err  err;
    mp_word W[MP_WARRAY];
 
@@ -22,7 +22,7 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho)
    }
 
    /* get old used count */
-   olduse = x->used;
+   oldused = x->used;
 
    /* grow a as required */
    if (x->alloc < (n->used + 1)) {
@@ -34,38 +34,30 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho)
    /* first we have to get the digits of the input into
     * an array of double precision words W[...]
     */
-   {
-      mp_word *_W;
-      mp_digit *tmpx;
 
-      /* alias for the W[] array */
-      _W   = W;
-
-      /* alias for the digits of  x*/
-      tmpx = x->dp;
-
-      /* copy the digits of a into W[0..a->used-1] */
-      for (ix = 0; ix < x->used; ix++) {
-         *_W++ = *tmpx++;
-      }
+   /* copy the digits of a into W[0..a->used-1] */
+   for (ix = 0; ix < x->used; ix++) {
+      W[ix] = x->dp[ix];
+   }
 
-      /* zero the high words of W[a->used..m->used*2] */
-      if (ix < ((n->used * 2) + 1)) {
-         MP_ZERO_BUFFER(_W, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix));
-      }
+   /* zero the high words of W[a->used..m->used*2] */
+   if (ix < ((n->used * 2) + 1)) {
+      MP_ZERO_BUFFER(W + x->used, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix));
    }
 
    /* now we proceed to zero successive digits
     * from the least significant upwards
     */
    for (ix = 0; ix < n->used; ix++) {
+      int iy;
+      mp_digit mu;
+
       /* mu = ai * m' mod b
        *
        * We avoid a double precision multiplication (which isn't required)
        * by casting the value down to a mp_digit.  Note this requires
        * that W[ix-1] have  the carry cleared (see after the inner loop)
        */
-      mp_digit mu;
       mu = ((W[ix] & MP_MASK) * rho) & MP_MASK;
 
       /* a = a + mu * m * b**i
@@ -82,21 +74,8 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho)
        * carry fixups are done in order so after these loops the
        * first m->used words of W[] have the carries fixed
        */
-      {
-         int iy;
-         mp_digit *tmpn;
-         mp_word *_W;
-
-         /* alias for the digits of the modulus */
-         tmpn = n->dp;
-
-         /* Alias for the columns set by an offset of ix */
-         _W = W + ix;
-
-         /* inner loop */
-         for (iy = 0; iy < n->used; iy++) {
-            *_W++ += (mp_word)mu * (mp_word)*tmpn++;
-         }
+      for (iy = 0; iy < n->used; iy++) {
+         W[ix + iy] += (mp_word)mu * (mp_word)n->dp[iy];
       }
 
       /* now fix carry for next digit, W[ix+1] */
@@ -107,47 +86,30 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho)
     * shift the words downward [all those least
     * significant digits we zeroed].
     */
-   {
-      mp_digit *tmpx;
-      mp_word *_W, *_W1;
-
-      /* nox fix rest of carries */
-
-      /* alias for current word */
-      _W1 = W + ix;
-
-      /* alias for next word, where the carry goes */
-      _W = W + ++ix;
 
-      for (; ix < ((n->used * 2) + 1); ix++) {
-         *_W++ += *_W1++ >> (mp_word)MP_DIGIT_BIT;
-      }
-
-      /* copy out, A = A/b**n
-       *
-       * The result is A/b**n but instead of converting from an
-       * array of mp_word to mp_digit than calling mp_rshd
-       * we just copy them in the right order
-       */
-
-      /* alias for destination word */
-      tmpx = x->dp;
-
-      /* alias for shifted double precision result */
-      _W = W + n->used;
+   for (; ix < (n->used * 2); ix++) {
+      W[ix + 1] += W[ix] >> (mp_word)MP_DIGIT_BIT;
+   }
 
-      for (ix = 0; ix < (n->used + 1); ix++) {
-         *tmpx++ = *_W++ & (mp_word)MP_MASK;
-      }
+   /* copy out, A = A/b**n
+    *
+    * The result is A/b**n but instead of converting from an
+    * array of mp_word to mp_digit than calling mp_rshd
+    * we just copy them in the right order
+    */
 
-      /* zero oldused digits, if the input a was larger than
-       * m->used+1 we'll have to clear the digits
-       */
-      MP_ZERO_DIGITS(tmpx, olduse - ix);
+   for (ix = 0; ix < (n->used + 1); ix++) {
+      x->dp[ix] = W[n->used + ix] & (mp_word)MP_MASK;
    }
 
-   /* set the max used and clamp */
+   /* set the max used */
    x->used = n->used + 1;
+
+   /* zero oldused digits, if the input a was larger than
+    * m->used+1 we'll have to clear the digits
+    */
+   MP_ZERO_DIGITS(x->dp + x->used, oldused - x->used);
+
    mp_clamp(x);
 
    /* if A >= m then A = A - m */