added libtommath-0.38
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
diff --git a/bn.pdf b/bn.pdf
index b54b602..79f4d4b 100644
Binary files a/bn.pdf and b/bn.pdf differ
diff --git a/bn.tex b/bn.tex
index f89e200..430a599 100644
--- a/bn.tex
+++ b/bn.tex
@@ -49,7 +49,7 @@
\begin{document}
\frontmatter
\pagestyle{empty}
-\title{LibTomMath User Manual \\ v0.37}
+\title{LibTomMath User Manual \\ v0.38}
\author{Tom St Denis \\ tomstdenis@iahu.ca}
\maketitle
This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been
diff --git a/bn_fast_s_mp_mul_digs.c b/bn_fast_s_mp_mul_digs.c
index a0ae08c..9ca3cfe 100644
--- a/bn_fast_s_mp_mul_digs.c
+++ b/bn_fast_s_mp_mul_digs.c
@@ -78,10 +78,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT);
- }
-
- /* store final carry */
- W[ix] = (mp_digit)(_W & MP_MASK);
+ }
/* setup dest */
olduse = c->used;
diff --git a/bn_fast_s_mp_mul_high_digs.c b/bn_fast_s_mp_mul_high_digs.c
index 61d42dd..0c6e839 100644
--- a/bn_fast_s_mp_mul_high_digs.c
+++ b/bn_fast_s_mp_mul_high_digs.c
@@ -70,9 +70,6 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
_W = _W >> ((mp_word)DIGIT_BIT);
}
- /* store final carry */
- W[ix] = (mp_digit)(_W & MP_MASK);
-
/* setup dest */
olduse = c->used;
c->used = pa;
diff --git a/changes.txt b/changes.txt
index 1322d14..cf65066 100644
--- a/changes.txt
+++ b/changes.txt
@@ -1,3 +1,7 @@
+Jan 26th, 2006
+v0.38 -- broken makefile.shared fixed
+ -- removed some carry stores that were not required [updated text]
+
November 18th, 2005
v0.37 -- [Don Porter] reported on a TCL list [HEY SEND ME BUGREPORTS ALREADY!!!] that mp_add_d() would compute -0 with some inputs. Fixed.
-- [rinick@gmail.com] reported the makefile.bcc was messed up. Fixed.
diff --git a/makefile b/makefile
index 192e842..d228411 100644
--- a/makefile
+++ b/makefile
@@ -3,7 +3,7 @@
#Tom St Denis
#version of library
-VERSION=0.37
+VERSION=0.38
CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare
diff --git a/makefile.shared b/makefile.shared
index 9d2c20a..45b2f63 100644
--- a/makefile.shared
+++ b/makefile.shared
@@ -1,7 +1,7 @@
#Makefile for GCC
#
#Tom St Denis
-VERSION=0:37
+VERSION=0:38
CC = libtool --mode=compile gcc
diff --git a/poster.pdf b/poster.pdf
index 95bf4b3..c975ba3 100644
Binary files a/poster.pdf and b/poster.pdf differ
diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c
index 5eabb2d..166d171 100644
--- a/pre_gen/mpi.c
+++ b/pre_gen/mpi.c
@@ -458,10 +458,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
/* make next carry */
_W = _W >> ((mp_word)DIGIT_BIT);
- }
-
- /* store final carry */
- W[ix] = (mp_digit)(_W & MP_MASK);
+ }
/* setup dest */
olduse = c->used;
@@ -564,9 +561,6 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
_W = _W >> ((mp_word)DIGIT_BIT);
}
- /* store final carry */
- W[ix] = (mp_digit)(_W & MP_MASK);
-
/* setup dest */
olduse = c->used;
c->used = pa;
diff --git a/tommath.pdf b/tommath.pdf
index 5c314f5..a25f3b6 100644
Binary files a/tommath.pdf and b/tommath.pdf differ
diff --git a/tommath.src b/tommath.src
index 8e03635..f81d8d8 100644
--- a/tommath.src
+++ b/tommath.src
@@ -66,7 +66,7 @@ QUALCOMM Australia \\
}
}
\maketitle
-This text has been placed in the public domain. This text corresponds to the v0.37 release of the
+This text has been placed in the public domain. This text corresponds to the v0.38 release of the
LibTomMath project.
\begin{alltt}
@@ -77,7 +77,7 @@ K2L 1C3
Canada
Phone: 1-613-836-3160
-Email: tomstdenis@iahu.ca
+Email: tomstdenis@gmail.com
\end{alltt}
This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{}
@@ -2190,7 +2190,7 @@ left.
After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts
required. If it is non-zero a modified shift loop is used to calculate the remaining product.
-Essentially the loop is a generic version of algorith mp\_mul2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
+Essentially the loop is a generic version of algorithm mp\_mul\_2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
variable is used to extract the upper $d$ bits to form the carry for the next iteration.
This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to
@@ -2611,17 +2611,16 @@ Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the s
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
-6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\\
-7. $oldused \leftarrow c.used$ \\
-8. $c.used \leftarrow digs$ \\
-9. for $ix$ from $0$ to $pa$ do \\
-\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\
-10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
-\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\
+6. $oldused \leftarrow c.used$ \\
+7. $c.used \leftarrow digs$ \\
+8. for $ix$ from $0$ to $pa$ do \\
+\hspace{3mm}8.1 $c_{ix} \leftarrow W_{ix}$ \\
+9. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
+\hspace{3mm}9.1 $c_{ix} \leftarrow 0$ \\
\\
-11. Clamp $c$. \\
-12. Return MP\_OKAY. \\
+10. Clamp $c$. \\
+11. Return MP\_OKAY. \\
\hline
\end{tabular}
\end{center}
diff --git a/tommath.tex b/tommath.tex
index a852a8d..791cc28 100644
--- a/tommath.tex
+++ b/tommath.tex
@@ -66,7 +66,7 @@ QUALCOMM Australia \\
}
}
\maketitle
-This text has been placed in the public domain. This text corresponds to the v0.37 release of the
+This text has been placed in the public domain. This text corresponds to the v0.38 release of the
LibTomMath project.
\begin{alltt}
@@ -77,7 +77,7 @@ K2L 1C3
Canada
Phone: 1-613-836-3160
-Email: tomstdenis@iahu.ca
+Email: tomstdenis@gmail.com
\end{alltt}
This text is formatted to the international B5 paper size of 176mm wide by 250mm tall using the \LaTeX{}
@@ -3169,7 +3169,7 @@ left.
After the digits have been shifted appropriately at most $lg(\beta) - 1$ shifts are left to perform. Step 5 calculates the number of remaining shifts
required. If it is non-zero a modified shift loop is used to calculate the remaining product.
-Essentially the loop is a generic version of algorith mp\_mul2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
+Essentially the loop is a generic version of algorithm mp\_mul\_2 designed to handle any shift count in the range $1 \le x < lg(\beta)$. The $mask$
variable is used to extract the upper $d$ bits to form the carry for the next iteration.
This algorithm is loosely measured as a $O(2n)$ algorithm which means that if the input is $n$-digits that it takes $2n$ ``time'' to
@@ -3864,17 +3864,16 @@ Place an array of \textbf{MP\_WARRAY} single precision digits named $W$ on the s
\hspace{6mm}5.4.1 $\_ \hat W \leftarrow \_ \hat W + a_{tx+iy}b_{ty-iy}$ \\
\hspace{3mm}5.5 $W_{ix} \leftarrow \_ \hat W (\mbox{mod }\beta)$\\
\hspace{3mm}5.6 $\_ \hat W \leftarrow \lfloor \_ \hat W / \beta \rfloor$ \\
-6. $W_{pa} \leftarrow \_ \hat W (\mbox{mod }\beta)$ \\
\\
-7. $oldused \leftarrow c.used$ \\
-8. $c.used \leftarrow digs$ \\
-9. for $ix$ from $0$ to $pa$ do \\
-\hspace{3mm}9.1 $c_{ix} \leftarrow W_{ix}$ \\
-10. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
-\hspace{3mm}10.1 $c_{ix} \leftarrow 0$ \\
+6. $oldused \leftarrow c.used$ \\
+7. $c.used \leftarrow digs$ \\
+8. for $ix$ from $0$ to $pa$ do \\
+\hspace{3mm}8.1 $c_{ix} \leftarrow W_{ix}$ \\
+9. for $ix$ from $pa + 1$ to $oldused - 1$ do \\
+\hspace{3mm}9.1 $c_{ix} \leftarrow 0$ \\
\\
-11. Clamp $c$. \\
-12. Return MP\_OKAY. \\
+10. Clamp $c$. \\
+11. Return MP\_OKAY. \\
\hline
\end{tabular}
\end{center}
@@ -3977,33 +3976,30 @@ and addition operations in the nested loop in parallel.
077
078 /* make next carry */
079 _W = _W >> ((mp_word)DIGIT_BIT);
-080 \}
+080 \}
081
-082 /* store final carry */
-083 W[ix] = (mp_digit)(_W & MP_MASK);
-084
-085 /* setup dest */
-086 olduse = c->used;
-087 c->used = pa;
-088
-089 \{
-090 register mp_digit *tmpc;
-091 tmpc = c->dp;
-092 for (ix = 0; ix < pa+1; ix++) \{
-093 /* now extract the previous digit [below the carry] */
-094 *tmpc++ = W[ix];
-095 \}
-096
-097 /* clear unused digits [that existed in the old copy of c] */
-098 for (; ix < olduse; ix++) \{
-099 *tmpc++ = 0;
-100 \}
-101 \}
-102 mp_clamp (c);
-103 return MP_OKAY;
-104 \}
-105 #endif
-106
+082 /* setup dest */
+083 olduse = c->used;
+084 c->used = pa;
+085
+086 \{
+087 register mp_digit *tmpc;
+088 tmpc = c->dp;
+089 for (ix = 0; ix < pa+1; ix++) \{
+090 /* now extract the previous digit [below the carry] */
+091 *tmpc++ = W[ix];
+092 \}
+093
+094 /* clear unused digits [that existed in the old copy of c] */
+095 for (; ix < olduse; ix++) \{
+096 *tmpc++ = 0;
+097 \}
+098 \}
+099 mp_clamp (c);
+100 return MP_OKAY;
+101 \}
+102 #endif
+103
\end{alltt}
\end{small}
@@ -4020,7 +4016,7 @@ slower and also often doesn't exist. This new algorithm only performs two reads
compiler has aliased $\_ \hat W$ to a CPU register.
After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines 76, 79) to forward it as
-a carry for the next pass. After the outer loop we use the final carry (line 83) as the last digit of the product.
+a carry for the next pass. After the outer loop we use the final carry (line 76) as the last digit of the product.
\subsection{Polynomial Basis Multiplication}
To break the $O(n^2)$ barrier in multiplication requires a completely different look at integer multiplication. In the following algorithms