Commit 4f00e75b8fc94889d72eb66d0431158a445c2635

Daniel Mendler 2019-11-06T16:51:51

make mp_div_3 private

diff --git a/demo/test.c b/demo/test.c
index f2f5800..3e432cf 100644
--- a/demo/test.c
+++ b/demo/test.c
@@ -1241,14 +1241,14 @@ LBL_ERR:
    return EXIT_FAILURE;
 }
 
-static int test_mp_div_3(void)
+static int test_s_mp_div_3(void)
 {
    int cnt;
 
    mp_int a, b, c, d, e;
    DOR(mp_init_multi(&a, &b, &c, &d, &e, NULL));
 
-   /* test mp_div_3  */
+   /* test s_mp_div_3  */
    mp_set(&d, 3u);
    for (cnt = 0; cnt < 10000;) {
       mp_digit r2;
@@ -1259,10 +1259,10 @@ static int test_mp_div_3(void)
       }
       DO(mp_rand(&a, (abs(rand_int()) % 128) + 1));
       DO(mp_div(&a, &d, &b, &e));
-      DO(mp_div_3(&a, &c, &r2));
+      DO(s_mp_div_3(&a, &c, &r2));
 
       if (mp_cmp(&b, &c) || mp_cmp_d(&e, r2)) {
-         printf("\nmp_div_3 => Failure\n");
+         printf("\ns_mp_div_3 => Failure\n");
          goto LBL_ERR;
       }
    }
@@ -2297,7 +2297,7 @@ static int unit_tests(int argc, char **argv)
       T1(mp_cnt_lsb, MP_CNT_LSB),
       T1(mp_complement, MP_COMPLEMENT),
       T1(mp_decr, MP_SUB_D),
-      T1(mp_div_3, MP_DIV_3),
+      T1(s_mp_div_3, S_MP_DIV_3),
       T1(mp_dr_reduce, MP_DR_REDUCE),
       T2(mp_pack_unpack,MP_PACK, MP_UNPACK),
       T2(mp_fread_fwrite, MP_FREAD, MP_FWRITE),
diff --git a/doc/bn.tex b/doc/bn.tex
index b8a6404..6204414 100644
--- a/doc/bn.tex
+++ b/doc/bn.tex
@@ -2605,14 +2605,6 @@ mp_err mp_incr(mp_int *a);
 mp_err mp_decr(mp_int *a);
 \end{alltt}
 
-The division by three can be made faster by replacing the division with a multiplication by the
-multiplicative inverse of three.
-
-\index{mp\_div\_3}
-\begin{alltt}
-mp_err mp_div_3(const mp_int *a, mp_int *c, mp_digit *d);
-\end{alltt}
-
 \chapter{Little Helpers}
 It is never wrong to have some useful little shortcuts at hand.
 \section{Function Macros}
diff --git a/mp_div_3.c b/mp_div_3.c
deleted file mode 100644
index c26692c..0000000
--- a/mp_div_3.c
+++ /dev/null
@@ -1,64 +0,0 @@
-#include "tommath_private.h"
-#ifdef MP_DIV_3_C
-/* LibTomMath, multiple-precision integer library -- Tom St Denis */
-/* SPDX-License-Identifier: Unlicense */
-
-/* divide by three (based on routine from MPI and the GMP manual) */
-mp_err mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)
-{
-   mp_int   q;
-   mp_word  w;
-   mp_digit b;
-   mp_err   err;
-   int      ix;
-
-   /* b = 2**MP_DIGIT_BIT / 3 */
-   b = ((mp_word)1 << (mp_word)MP_DIGIT_BIT) / (mp_word)3;
-
-   if ((err = mp_init_size(&q, a->used)) != MP_OKAY) {
-      return err;
-   }
-
-   q.used = a->used;
-   q.sign = a->sign;
-   w = 0;
-   for (ix = a->used; ix --> 0;) {
-      mp_word t;
-      w = (w << (mp_word)MP_DIGIT_BIT) | (mp_word)a->dp[ix];
-
-      if (w >= 3u) {
-         /* multiply w by [1/3] */
-         t = (w * (mp_word)b) >> (mp_word)MP_DIGIT_BIT;
-
-         /* now subtract 3 * [w/3] from w, to get the remainder */
-         w -= t+t+t;
-
-         /* fixup the remainder as required since
-          * the optimization is not exact.
-          */
-         while (w >= 3u) {
-            t += 1u;
-            w -= 3u;
-         }
-      } else {
-         t = 0;
-      }
-      q.dp[ix] = (mp_digit)t;
-   }
-
-   /* [optional] store the remainder */
-   if (d != NULL) {
-      *d = (mp_digit)w;
-   }
-
-   /* [optional] store the quotient */
-   if (c != NULL) {
-      mp_clamp(&q);
-      mp_exch(&q, c);
-   }
-   mp_clear(&q);
-
-   return MP_OKAY;
-}
-
-#endif
diff --git a/mp_div_d.c b/mp_div_d.c
index 472ab27..fc0a5f2 100644
--- a/mp_div_d.c
+++ b/mp_div_d.c
@@ -43,8 +43,8 @@ mp_err mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
    }
 
    /* three? */
-   if (MP_HAS(MP_DIV_3) && (b == 3u)) {
-      return mp_div_3(a, c, d);
+   if (MP_HAS(S_MP_DIV_3) && (b == 3u)) {
+      return s_mp_div_3(a, c, d);
    }
 
    /* no easy answer [c'est la vie].  Just division */
diff --git a/s_mp_div_3.c b/s_mp_div_3.c
new file mode 100644
index 0000000..1cc6d3d
--- /dev/null
+++ b/s_mp_div_3.c
@@ -0,0 +1,64 @@
+#include "tommath_private.h"
+#ifdef S_MP_DIV_3_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+/* divide by three (based on routine from MPI and the GMP manual) */
+mp_err s_mp_div_3(const mp_int *a, mp_int *c, mp_digit *d)
+{
+   mp_int   q;
+   mp_word  w;
+   mp_digit b;
+   mp_err   err;
+   int      ix;
+
+   /* b = 2**MP_DIGIT_BIT / 3 */
+   b = ((mp_word)1 << (mp_word)MP_DIGIT_BIT) / (mp_word)3;
+
+   if ((err = mp_init_size(&q, a->used)) != MP_OKAY) {
+      return err;
+   }
+
+   q.used = a->used;
+   q.sign = a->sign;
+   w = 0;
+   for (ix = a->used; ix --> 0;) {
+      mp_word t;
+      w = (w << (mp_word)MP_DIGIT_BIT) | (mp_word)a->dp[ix];
+
+      if (w >= 3u) {
+         /* multiply w by [1/3] */
+         t = (w * (mp_word)b) >> (mp_word)MP_DIGIT_BIT;
+
+         /* now subtract 3 * [w/3] from w, to get the remainder */
+         w -= t+t+t;
+
+         /* fixup the remainder as required since
+          * the optimization is not exact.
+          */
+         while (w >= 3u) {
+            t += 1u;
+            w -= 3u;
+         }
+      } else {
+         t = 0;
+      }
+      q.dp[ix] = (mp_digit)t;
+   }
+
+   /* [optional] store the remainder */
+   if (d != NULL) {
+      *d = (mp_digit)w;
+   }
+
+   /* [optional] store the quotient */
+   if (c != NULL) {
+      mp_clamp(&q);
+      mp_exch(&q, c);
+   }
+   mp_clear(&q);
+
+   return MP_OKAY;
+}
+
+#endif
diff --git a/s_mp_mul_toom.c b/s_mp_mul_toom.c
index 1ed03c8..f6c2b10 100644
--- a/s_mp_mul_toom.c
+++ b/s_mp_mul_toom.c
@@ -133,7 +133,7 @@ mp_err s_mp_mul_toom(const mp_int *a, const mp_int *b, mp_int *c)
    if ((err = mp_sub(&S2, &a1, &S2)) != MP_OKAY)                  goto LBL_ERR;
 
    /** S2 = S2 / 3; \\ this is an exact division  */
-   if ((err = mp_div_3(&S2, &S2, NULL)) != MP_OKAY)               goto LBL_ERR;
+   if ((err = s_mp_div_3(&S2, &S2, NULL)) != MP_OKAY)             goto LBL_ERR;
 
    /** a1 = S1 - a1; */
    if ((err = mp_sub(&S1, &a1, &a1)) != MP_OKAY)                  goto LBL_ERR;
diff --git a/tommath.h b/tommath.h
index 01a6d34..5e75c98 100644
--- a/tommath.h
+++ b/tommath.h
@@ -300,9 +300,6 @@ mp_err mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d) MP_WUR;
 /* b = a/2 */
 mp_err mp_div_2(const mp_int *a, mp_int *b) MP_WUR;
 
-/* a/3 => 3c + d == a */
-mp_err mp_div_3(const mp_int *a, mp_int *c, mp_digit *d) MP_WUR;
-
 /* c = a * 2**b, implemented as c = a << b */
 mp_err mp_mul_2d(const mp_int *a, int b, mp_int *c) MP_WUR;
 
diff --git a/tommath_private.h b/tommath_private.h
index 17bbb73..f8af531 100644
--- a/tommath_private.h
+++ b/tommath_private.h
@@ -158,36 +158,37 @@ MP_STATIC_ASSERT(prec_geq_min_prec, MP_PREC >= MP_MIN_PREC)
 extern MP_PRIVATE mp_err(*s_mp_rand_source)(void *out, size_t size);
 
 /* lowlevel functions, do not call! */
-MP_PRIVATE bool s_mp_get_bit(const mp_int *a, int b);
+MP_PRIVATE bool s_mp_get_bit(const mp_int *a, int b) MP_WUR;
+MP_PRIVATE mp_digit s_mp_log_d(mp_digit base, mp_digit n) MP_WUR;
 MP_PRIVATE mp_err s_mp_add(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
-MP_PRIVATE mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
-MP_PRIVATE mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
+MP_PRIVATE mp_err s_mp_div_3(const mp_int *a, mp_int *c, mp_digit *d) MP_WUR;
+MP_PRIVATE mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r) MP_WUR;
+MP_PRIVATE mp_err s_mp_div_school(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) MP_WUR;
+MP_PRIVATE mp_err s_mp_div_small(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d) MP_WUR;
+MP_PRIVATE mp_err s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
+MP_PRIVATE mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
+MP_PRIVATE mp_err s_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
+MP_PRIVATE mp_err s_mp_invmod_odd(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
+MP_PRIVATE mp_err s_mp_log(const mp_int *a, uint32_t base, uint32_t *c) MP_WUR;
+MP_PRIVATE mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) MP_WUR;
 MP_PRIVATE mp_err s_mp_mul(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
-MP_PRIVATE mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
-MP_PRIVATE mp_err s_mp_mul_high(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
-MP_PRIVATE mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) MP_WUR;
-MP_PRIVATE mp_err s_mp_sqr(const mp_int *a, mp_int *b) MP_WUR;
 MP_PRIVATE mp_err s_mp_mul_balance(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
+MP_PRIVATE mp_err s_mp_mul_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
+MP_PRIVATE mp_err s_mp_mul_high(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
+MP_PRIVATE mp_err s_mp_mul_high_comba(const mp_int *a, const mp_int *b, mp_int *c, int digs) MP_WUR;
 MP_PRIVATE mp_err s_mp_mul_karatsuba(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
 MP_PRIVATE mp_err s_mp_mul_toom(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
+MP_PRIVATE mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result) MP_WUR;
+MP_PRIVATE mp_err s_mp_rand_platform(void *p, size_t n) MP_WUR;
+MP_PRIVATE mp_err s_mp_sqr(const mp_int *a, mp_int *b) MP_WUR;
+MP_PRIVATE mp_err s_mp_sqr_comba(const mp_int *a, mp_int *b) MP_WUR;
 MP_PRIVATE mp_err s_mp_sqr_karatsuba(const mp_int *a, mp_int *b) MP_WUR;
 MP_PRIVATE mp_err s_mp_sqr_toom(const mp_int *a, mp_int *b) MP_WUR;
-MP_PRIVATE mp_err s_mp_invmod_odd(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
-MP_PRIVATE mp_err s_mp_invmod(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
-MP_PRIVATE mp_err s_mp_montgomery_reduce_comba(mp_int *x, const mp_int *n, mp_digit rho) MP_WUR;
-MP_PRIVATE mp_err s_mp_exptmod_fast(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
-MP_PRIVATE mp_err s_mp_exptmod(const mp_int *G, const mp_int *X, const mp_int *P, mp_int *Y, int redmode) MP_WUR;
-MP_PRIVATE mp_err s_mp_rand_platform(void *p, size_t n) MP_WUR;
-MP_PRIVATE mp_err s_mp_prime_is_divisible(const mp_int *a, bool *result);
-MP_PRIVATE mp_digit s_mp_log_d(mp_digit base, mp_digit n);
-MP_PRIVATE mp_err s_mp_log(const mp_int *a, uint32_t base, uint32_t *c);
-MP_PRIVATE uint32_t s_mp_log_pow2(const mp_int *a, uint32_t base);
-MP_PRIVATE mp_err s_mp_div_recursive(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
-MP_PRIVATE mp_err s_mp_div_school(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d);
-MP_PRIVATE mp_err s_mp_div_small(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d);
+MP_PRIVATE mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c) MP_WUR;
+MP_PRIVATE uint32_t s_mp_log_pow2(const mp_int *a, uint32_t base) MP_WUR;
+MP_PRIVATE void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits);
 MP_PRIVATE void s_mp_zero_buf(void *mem, size_t size);
 MP_PRIVATE void s_mp_zero_digs(mp_digit *d, int digits);
-MP_PRIVATE void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits);
 
 /* TODO: jenkins prng is not thread safe as of now */
 MP_PRIVATE mp_err s_mp_rand_jenkins(void *p, size_t n) MP_WUR;
diff --git a/tommath_superclass.h b/tommath_superclass.h
index 1d1e000..dd83cad 100644
--- a/tommath_superclass.h
+++ b/tommath_superclass.h
@@ -75,7 +75,6 @@
  * like removing support for even moduli, etc...
  */
 #   ifdef LTM_LAST
-#      undef MP_DIV_3_C
 #      undef MP_DR_IS_MODULUS_C
 #      undef MP_DR_REDUCE_C
 #      undef MP_DR_SETUP_C
@@ -83,6 +82,7 @@
 #      undef MP_REDUCE_2K_SETUP_C
 #      undef MP_REDUCE_IS_2K_C
 #      undef MP_REDUCE_SETUP_C
+#      undef S_MP_DIV_3_C
 #      undef S_MP_EXPTMOD_C
 #      undef S_MP_INVMOD_ODD_C
 #      undef S_MP_MUL_BALANCE_C