diff --git a/bn.pdf b/bn.pdf index a0cde68..dbaaf90 100644 Binary files a/bn.pdf and b/bn.pdf differ diff --git a/bn.tex b/bn.tex index 60f6d50..06f7b1e 100644 --- a/bn.tex +++ b/bn.tex @@ -1,7 +1,7 @@ \documentclass[]{article} \begin{document} -\title{LibTomMath v0.25 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } +\title{LibTomMath v0.26 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } \author{Tom St Denis \\ tomstdenis@iahu.ca} \maketitle \newpage diff --git a/bn_fast_mp_montgomery_reduce.c b/bn_fast_mp_montgomery_reduce.c index b6a8645..8fef486 100644 --- a/bn_fast_mp_montgomery_reduce.c +++ b/bn_fast_mp_montgomery_reduce.c @@ -45,7 +45,10 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) register mp_word *_W; register mp_digit *tmpx; - _W = W; + /* alias for the W[] array */ + _W = W; + + /* alias for the digits of x*/ tmpx = x->dp; /* copy the digits of a into W[0..a->used-1] */ diff --git a/bn_mp_add_d.c b/bn_mp_add_d.c index 11b2ecd..2d13d48 100644 --- a/bn_mp_add_d.c +++ b/bn_mp_add_d.c @@ -42,7 +42,6 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) return res; } - /* old number of used digits in c */ oldused = c->used; @@ -76,7 +75,6 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) /* set final carry */ ix++; *tmpc++ = mu; - } else { /* a was negative and |a| < b */ c->used = 1; diff --git a/bn_mp_cmp.c b/bn_mp_cmp.c index 995e4c4..ed27d21 100644 --- a/bn_mp_cmp.c +++ b/bn_mp_cmp.c @@ -19,12 +19,12 @@ int mp_cmp (mp_int * a, mp_int * b) { /* compare based on sign */ - if (a->sign == MP_NEG && b->sign == MP_ZPOS) { - return MP_LT; - } - - if (a->sign == MP_ZPOS && b->sign == MP_NEG) { - return MP_GT; + if (a->sign != b->sign) { + if (a->sign == MP_NEG) { + return MP_LT; + } else { + return MP_GT; + } } /* compare digits */ diff --git a/bn_mp_cmp_mag.c b/bn_mp_cmp_mag.c index c52d725..be38b87 100644 --- a/bn_mp_cmp_mag.c +++ b/bn_mp_cmp_mag.c @@ -19,23 +19,30 @@ int mp_cmp_mag (mp_int * a, mp_int * b) { int n; + mp_digit *tmpa, *tmpb; /* compare based on # of non-zero digits */ if (a->used > b->used) { return MP_GT; - } + } if (a->used < b->used) { return MP_LT; } + /* alias for a */ + tmpa = a->dp + (a->used - 1); + + /* alias for b */ + tmpb = b->dp + (a->used - 1); + /* compare based on digits */ - for (n = a->used - 1; n >= 0; n--) { - if (a->dp[n] > b->dp[n]) { + for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { + if (*tmpa > *tmpb) { return MP_GT; - } - - if (a->dp[n] < b->dp[n]) { + } + + if (*tmpa < *tmpb) { return MP_LT; } } diff --git a/bn_mp_div.c b/bn_mp_div.c index b211cf4..07e1dee 100644 --- a/bn_mp_div.c +++ b/bn_mp_div.c @@ -111,8 +111,9 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d) /* step 3. for i from n down to (t + 1) */ for (i = n; i >= (t + 1); i--) { - if (i > x.used) + if (i > x.used) { continue; + } /* step 3.1 if xi == yt then set q{i-t-1} to b-1, * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ diff --git a/bn_mp_dr_reduce.c b/bn_mp_dr_reduce.c index 500f558..2fe9dbe 100644 --- a/bn_mp_dr_reduce.c +++ b/bn_mp_dr_reduce.c @@ -25,6 +25,8 @@ * The modulus must be of a special format [see manual] * * Has been modified to use algorithm 7.10 from the LTM book instead + * + * Input x must be in the range 0 <= x <= (n-1)^2 */ int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) @@ -63,10 +65,10 @@ top: *tmpx1++ = (mp_digit)(r & MP_MASK); mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); } - + /* set final carry */ *tmpx1++ = mu; - + /* zero words above m */ for (i = m + 1; i < x->used; i++) { *tmpx1++ = 0; diff --git a/bn_mp_init_multi.c b/bn_mp_init_multi.c index 1989b55..99ce331 100644 --- a/bn_mp_init_multi.c +++ b/bn_mp_init_multi.c @@ -49,4 +49,5 @@ int mp_init_multi(mp_int *mp, ...) } va_end(args); return res; /* Assumed ok, if error flagged above. */ -} \ No newline at end of file +} + diff --git a/bn_mp_lcm.c b/bn_mp_lcm.c index 0c0c237..8a3ccfd 100644 --- a/bn_mp_lcm.c +++ b/bn_mp_lcm.c @@ -14,29 +14,42 @@ */ #include <tommath.h> -/* computes least common multiple as a*b/(a, b) */ +/* computes least common multiple as |a*b|/(a, b) */ int mp_lcm (mp_int * a, mp_int * b, mp_int * c) { int res; - mp_int t; + mp_int t1, t2; - if ((res = mp_init (&t)) != MP_OKAY) { + if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) { return res; } - if ((res = mp_mul (a, b, &t)) != MP_OKAY) { - mp_clear (&t); - return res; + /* t1 = get the GCD of the two inputs */ + if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) { + goto __T; } - if ((res = mp_gcd (a, b, c)) != MP_OKAY) { - mp_clear (&t); - return res; + /* divide the smallest by the GCD */ + if (mp_cmp_mag(a, b) == MP_LT) { + /* store quotient in t2 such that t2 * b is the LCM */ + if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) { + goto __T; + } + res = mp_mul(b, &t2, c); + } else { + /* store quotient in t2 such that t2 * a is the LCM */ + if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) { + goto __T; + } + res = mp_mul(a, &t2, c); } - res = mp_div (&t, c, c, NULL); - mp_clear (&t); + /* fix the sign to positive */ + c->sign = MP_ZPOS; + +__T: + mp_clear_multi (&t1, &t2, NULL); return res; } diff --git a/bn_mp_montgomery_reduce.c b/bn_mp_montgomery_reduce.c index 3fabaf9..61bb770 100644 --- a/bn_mp_montgomery_reduce.c +++ b/bn_mp_montgomery_reduce.c @@ -43,7 +43,14 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) x->used = digs; for (ix = 0; ix < n->used; ix++) { - /* mu = ai * m' mod b */ + /* mu = ai * rho mod b + * + * The value of rho must be precalculated via + * bn_mp_montgomery_setup() such that + * it equals -1/n0 mod b this allows the + * following inner loop to reduce the + * input one digit at a time + */ mu = ((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK; /* a = a + mu * m * b**i */ @@ -52,8 +59,10 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) register mp_digit *tmpn, *tmpx, u; register mp_word r; - /* aliases */ + /* alias for digits of the modulus */ tmpn = n->dp; + + /* alias for the digits of x [the input] */ tmpx = x->dp + ix; /* set the carry to zero */ @@ -61,12 +70,20 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) /* Multiply and add in place */ for (iy = 0; iy < n->used; iy++) { + /* compute product and sum */ r = ((mp_word)mu) * ((mp_word)*tmpn++) + ((mp_word) u) + ((mp_word) * tmpx); + + /* get carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + + /* fix digit */ *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); } - /* propagate carries */ + /* At this point the ix'th digit of x should be zero */ + + + /* propagate carries upwards as required*/ while (u) { *tmpx += u; u = *tmpx >> DIGIT_BIT; @@ -75,11 +92,18 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) } } + /* at this point the n.used'th least + * significant digits of x are all zero + * which means we can shift x to the + * right by n.used digits and the + * residue is unchanged. + */ + /* x = x/b**n.used */ mp_clamp(x); mp_rshd (x, n->used); - /* if A >= m then A = A - m */ + /* if x >= n then x = x - n */ if (mp_cmp_mag (x, n) != MP_LT) { return s_mp_sub (x, n, x); } diff --git a/bn_mp_neg.c b/bn_mp_neg.c index e710790..cd40059 100644 --- a/bn_mp_neg.c +++ b/bn_mp_neg.c @@ -22,6 +22,8 @@ mp_neg (mp_int * a, mp_int * b) if ((res = mp_copy (a, b)) != MP_OKAY) { return res; } - b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; + if (mp_iszero(b) != 1) { + b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; + } return MP_OKAY; } diff --git a/bn_mp_read_signed_bin.c b/bn_mp_read_signed_bin.c index 7817243..39d8618 100644 --- a/bn_mp_read_signed_bin.c +++ b/bn_mp_read_signed_bin.c @@ -20,9 +20,17 @@ mp_read_signed_bin (mp_int * a, unsigned char *b, int c) { int res; + /* read magnitude */ if ((res = mp_read_unsigned_bin (a, b + 1, c - 1)) != MP_OKAY) { return res; } - a->sign = ((b[0] == (unsigned char) 0) ? MP_ZPOS : MP_NEG); + + /* first byte is 0 for positive, non-zero for negative */ + if (b[0] == 0) { + a->sign = MP_ZPOS; + } else { + a->sign = MP_NEG; + } + return MP_OKAY; } diff --git a/bn_mp_read_unsigned_bin.c b/bn_mp_read_unsigned_bin.c index 3b4f5cb..cb11d87 100644 --- a/bn_mp_read_unsigned_bin.c +++ b/bn_mp_read_unsigned_bin.c @@ -19,7 +19,18 @@ int mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c) { int res; + + /* make sure there are at least two digits */ + if (a->alloc < 2) { + if ((res = mp_grow(a, 2)) != MP_OKAY) { + return res; + } + } + + /* zero the int */ mp_zero (a); + + /* read the bytes in */ while (c-- > 0) { if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { return res; diff --git a/bn_mp_set_int.c b/bn_mp_set_int.c index 26611aa..da3778b 100644 --- a/bn_mp_set_int.c +++ b/bn_mp_set_int.c @@ -21,6 +21,7 @@ mp_set_int (mp_int * a, unsigned int b) int x, res; mp_zero (a); + /* set four bits at a time */ for (x = 0; x < 8; x++) { /* shift the number up four bits */ diff --git a/bn_s_mp_mul_digs.c b/bn_s_mp_mul_digs.c index 2d78a2d..a5b1067 100644 --- a/bn_s_mp_mul_digs.c +++ b/bn_s_mp_mul_digs.c @@ -61,15 +61,15 @@ s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) /* compute the columns of the output and propagate the carry */ for (iy = 0; iy < pb; iy++) { /* compute the column as a mp_word */ - r = ((mp_word) *tmpt) + - ((mp_word)tmpx) * ((mp_word)*tmpy++) + - ((mp_word) u); + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); /* the new column is the lower part of the result */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get the carry word from the result */ - u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } /* set carry if it is placed below digs */ if (ix + iy < digs) { diff --git a/bn_s_mp_mul_high_digs.c b/bn_s_mp_mul_high_digs.c index 4e4d536..84cc7d0 100644 --- a/bn_s_mp_mul_high_digs.c +++ b/bn_s_mp_mul_high_digs.c @@ -26,7 +26,6 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) mp_word r; mp_digit tmpx, *tmpt, *tmpy; - /* can we use the fast multiplier? */ if (((a->used + b->used + 1) < MP_WARRAY) && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { @@ -55,13 +54,15 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) for (iy = digs - ix; iy < pb; iy++) { /* calculate the double precision result */ - r = ((mp_word) * tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word) u); + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); /* get the lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* carry the carry */ - u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } *tmpt = u; } diff --git a/bn_s_mp_sqr.c b/bn_s_mp_sqr.c index 9bf3ef1..3b68152 100644 --- a/bn_s_mp_sqr.c +++ b/bn_s_mp_sqr.c @@ -54,19 +54,19 @@ s_mp_sqr (mp_int * a, mp_int * b) /* now calculate the double precision result, note we use * addition instead of *2 since it's easier to optimize */ - r = ((mp_word) *tmpt) + r + r + ((mp_word) u); + r = ((mp_word) *tmpt) + r + r + ((mp_word) u); /* store lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get carry */ - u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } /* propagate upwards */ while (u != ((mp_digit) 0)) { - r = ((mp_word) * tmpt) + ((mp_word) u); + r = ((mp_word) *tmpt) + ((mp_word) u); *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); - u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } } diff --git a/changes.txt b/changes.txt index fbbd0a2..0772f45 100644 --- a/changes.txt +++ b/changes.txt @@ -1,3 +1,13 @@ +Aug 29th, 2003 +v0.26 -- Fixed typo that caused warning with GCC 3.2 + -- Martin Marcel noticed a bug in mp_neg() that allowed negative zeroes. + Also, Martin is the fellow who noted the bugs in mp_gcd() of 0.24/0.25. + -- Martin Marcel noticed an optimization [and slight bug] in mp_lcm(). + -- Added fix to mp_read_unsigned_bin to prevent a buffer overflow. + -- Beefed up the comments in the baseline multipliers [and montgomery] + -- Added "mont" demo to the makefile.msvc in etc/ + -- Optimized sign compares in mp_cmp from 4 to 2 cases. + Aug 4th, 2003 v0.25 -- Fix to mp_gcd again... oops (0,-a) == (-a, 0) == a -- Fix to mp_clear which didn't reset the sign [Greg Rose] diff --git a/changes.txt~ b/changes.txt~ new file mode 100644 index 0000000..2ccb559 --- /dev/null +++ b/changes.txt~ @@ -0,0 +1,244 @@ +Aug 11th, 2003 +v0.26 -- Fixed typo that caused warning with GCC 3.2 + -- Martin Marcel noticed a bug in mp_neg() that allowed negative zeroes. + Also, Martin is the fellow who noted the bugs in mp_gcd() of 0.24/0.25. + -- Martin Marcel noticed an optimization [and slight bug] in mp_lcm(). + -- Added fix to mp_read_unsigned_bin to prevent a buffer overflow. + -- Beefed up the comments in the baseline multipliers [and montgomery] + -- Added "mont" demo to the makefile.msvc in etc/ + -- Optimized sign compares in mp_cmp from 4 to 2 cases. + +Aug 4th, 2003 +v0.25 -- Fix to mp_gcd again... oops (0,-a) == (-a, 0) == a + -- Fix to mp_clear which didn't reset the sign [Greg Rose] + -- Added mp_error_to_string() to convert return codes to strings. [Greg Rose] + -- Optimized fast_mp_invmod() to do the test for invalid inputs [both even] + first so temps don't have to be initialized if it's going to fail. + -- Optimized mp_gcd() by removing mp_div_2d calls for when one of the inputs + is odd. + -- Tons of new comments, some indentation fixups, etc. + -- mp_jacobi() returns MP_VAL if the modulus is less than or equal to zero. + -- fixed two typos in the header of each file :-) + -- LibTomMath is officially Public Domain [see LICENSE] + +July 15th, 2003 +v0.24 -- Optimized mp_add_d and mp_sub_d to not allocate temporary variables + -- Fixed mp_gcd() so the gcd of 0,0 is 0. Allows the gcd operation to be chained + e.g. (0,0,a) == a [instead of 1] + -- Should be one of the last release for a while. Working on LibTomMath book now. + -- optimized the pprime demo [/etc/pprime.c] to first make a huge table of single + digit primes then it reads them randomly instead of randomly choosing/testing single + digit primes. + +July 12th, 2003 +v0.23 -- Optimized mp_prime_next_prime() to not use mp_mod [via is_divisible()] in each + iteration. Instead now a smaller table is kept of the residues which can be updated + without division. + -- Fixed a bug in next_prime() where an input of zero would be treated as odd and + have two added to it [to move to the next odd]. + -- fixed a bug in prime_fermat() and prime_miller_rabin() which allowed the base + to be negative, zero or one. Normally the test is only valid if the base is + greater than one. + -- changed the next_prime() prototype to accept a new parameter "bbs_style" which + will find the next prime congruent to 3 mod 4. The default [bbs_style==0] will + make primes which are either congruent to 1 or 3 mod 4. + -- fixed mp_read_unsigned_bin() so that it doesn't include both code for + the case DIGIT_BIT < 8 and >= 8 + -- optimized div_d() to easy out on division by 1 [or if a == 0] and use + logical shifts if the divisor is a power of two. + -- the default DIGIT_BIT type was not int for non-default builds. Fixed. + +July 2nd, 2003 +v0.22 -- Fixed up mp_invmod so the result is properly in range now [was always congruent to the inverse...] + -- Fixed up s_mp_exptmod and mp_exptmod_fast so the lower half of the pre-computed table isn't allocated + which makes the algorithm use half as much ram. + -- Fixed the install script not to make the book :-) [which isn't included anyways] + -- added mp_cnt_lsb() which counts how many of the lsbs are zero + -- optimized mp_gcd() to use the new mp_cnt_lsb() to replace multiple divisions by two by a single division. + -- applied similar optimization to mp_prime_miller_rabin(). + -- Fixed a bug in both mp_invmod() and fast_mp_invmod() which tested for odd + via "mp_iseven() == 0" which is not valid [since zero is not even either]. + +June 19th, 2003 +v0.21 -- Fixed bug in mp_mul_d which would not handle sign correctly [would not always forward it] + -- Removed the #line lines from gen.pl [was in violation of ISO C] + +June 8th, 2003 +v0.20 -- Removed the book from the package. Added the TDCAL license document. + -- This release is officially pure-bred TDCAL again [last officially TDCAL based release was v0.16] + +June 6th, 2003 +v0.19 -- Fixed a bug in mp_montgomery_reduce() which was introduced when I tweaked mp_rshd() in the previous release. + Essentially the digits were not trimmed before the compare which cause a subtraction to occur all the time. + -- Fixed up etc/tune.c a bit to stop testing new cutoffs after 16 failures [to find more optimal points]. + Brute force ho! + + +May 29th, 2003 +v0.18 -- Fixed a bug in s_mp_sqr which would handle carries properly just not very elegantly. + (e.g. correct result, just bad looking code) + -- Fixed bug in mp_sqr which still had a 512 constant instead of MP_WARRAY + -- Added Toom-Cook multipliers [needs tuning!] + -- Added efficient divide by 3 algorithm mp_div_3 + -- Re-wrote mp_div_d to be faster than calling mp_div + -- Added in a donated BCC makefile and a single page LTM poster (ahalhabsi@sbcglobal.net) + -- Added mp_reduce_2k which reduces an input modulo n = 2**p - k for any single digit k + -- Made the exptmod system be aware of the 2k reduction algorithms. + -- Rewrote mp_dr_reduce to be smaller, simpler and easier to understand. + +May 17th, 2003 +v0.17 -- Benjamin Goldberg submitted optimized mp_add and mp_sub routines. A new gen.pl as well + as several smaller suggestions. Thanks! + -- removed call to mp_cmp in inner loop of mp_div and put mp_cmp_mag in its place :-) + -- Fixed bug in mp_exptmod that would cause it to fail for odd moduli when DIGIT_BIT != 28 + -- mp_exptmod now also returns errors if the modulus is negative and will handle negative exponents + -- mp_prime_is_prime will now return true if the input is one of the primes in the prime table + -- Damian M Gryski (dgryski@uwaterloo.ca) found a index out of bounds error in the + mp_fast_s_mp_mul_high_digs function which didn't come up before. (fixed) + -- Refactored the DR reduction code so there is only one function per file. + -- Fixed bug in the mp_mul() which would erroneously avoid the faster multiplier [comba] when it was + allowed. The bug would not cause the incorrect value to be produced just less efficient (fixed) + -- Fixed similar bug in the Montgomery reduction code. + -- Added tons of (mp_digit) casts so the 7/15/28/31 bit digit code will work flawlessly out of the box. + Also added limited support for 64-bit machines with a 60-bit digit. Both thanks to Tom Wu (tom@arcot.com) + -- Added new comments here and there, cleaned up some code [style stuff] + -- Fixed a lingering typo in mp_exptmod* that would set bitcnt to zero then one. Very silly stuff :-) + -- Fixed up mp_exptmod_fast so it would set "redux" to the comba Montgomery reduction if allowed. This + saves quite a few calls and if statements. + -- Added etc/mont.c a test of the Montgomery reduction [assuming all else works :-| ] + -- Fixed up etc/tune.c to use a wider test range [more appropriate] also added a x86 based addition which + uses RDTSC for high precision timing. + -- Updated demo/demo.c to remove MPI stuff [won't work anyways], made the tests run for 2 seconds each so its + not so insanely slow. Also made the output space delimited [and fixed up various errors] + -- Added logs directory, logs/graph.dem which will use gnuplot to make a series of PNG files + that go with the pre-made index.html. You have to build [via make timing] and run ltmtest first in the + root of the package. + -- Fixed a bug in mp_sub and mp_add where "-a - -a" or "-a + a" would produce -0 as the result [obviously invalid]. + -- Fixed a bug in mp_rshd. If the count == a.used it should zero/return [instead of shifting] + -- Fixed a "off-by-one" bug in mp_mul2d. The initial size check on alloc would be off by one if the residue + shifting caused a carry. + -- Fixed a bug where s_mp_mul_digs() would not call the Comba based routine if allowed. This made Barrett reduction + slower than it had to be. + +Mar 29th, 2003 +v0.16 -- Sped up mp_div by making normalization one shift call + -- Sped up mp_mul_2d/mp_div_2d by aliasing pointers :-) + -- Cleaned up mp_gcd to use the macros for odd/even detection + -- Added comments here and there, mostly there but occasionally here too. + +Mar 22nd, 2003 +v0.15 -- Added series of prime testing routines to lib + -- Fixed up etc/tune.c + -- Added DR reduction algorithm + -- Beefed up the manual more. + -- Fixed up demo/demo.c so it doesn't have so many warnings and it does the full series of + tests + -- Added "pre-gen" directory which will hold a "gen.pl"'ed copy of the entire lib [done at + zipup time so its always the latest] + -- Added conditional casts for C++ users [boo!] + +Mar 15th, 2003 +v0.14 -- Tons of manual updates + -- cleaned up the directory + -- added MSVC makefiles + -- source changes [that I don't recall] + -- Fixed up the lshd/rshd code to use pointer aliasing + -- Fixed up the mul_2d and div_2d to not call rshd/lshd unless needed + -- Fixed up etc/tune.c a tad + -- fixed up demo/demo.c to output comma-delimited results of timing + also fixed up timing demo to use a finer granularity for various functions + -- fixed up demo/demo.c testing to pause during testing so my Duron won't catch on fire + [stays around 31-35C during testing :-)] + +Feb 13th, 2003 +v0.13 -- tons of minor speed-ups in low level add, sub, mul_2 and div_2 which propagate + to other functions like mp_invmod, mp_div, etc... + -- Sped up mp_exptmod_fast by using new code to find R mod m [e.g. B^n mod m] + -- minor fixes + +Jan 17th, 2003 +v0.12 -- re-wrote the majority of the makefile so its more portable and will + install via "make install" on most *nix platforms + -- Re-packaged all the source as seperate files. Means the library a single + file packagage any more. Instead of just adding "bn.c" you have to add + libtommath.a + -- Renamed "bn.h" to "tommath.h" + -- Changes to the manual to reflect all of this + -- Used GNU Indent to clean up the source + +Jan 15th, 2003 +v0.11 -- More subtle fixes + -- Moved to gentoo linux [hurrah!] so made *nix specific fixes to the make process + -- Sped up the montgomery reduction code quite a bit + -- fixed up demo so when building timing for the x86 it assumes ELF format now + +Jan 9th, 2003 +v0.10 -- Pekka Riikonen suggested fixes to the radix conversion code. + -- Added baseline montgomery and comba montgomery reductions, sped up exptmods + [to a point, see bn.h for MONTGOMERY_EXPT_CUTOFF] + +Jan 6th, 2003 +v0.09 -- Updated the manual to reflect recent changes. :-) + -- Added Jacobi function (mp_jacobi) to supplement the number theory side of the lib + -- Added a Mersenne prime finder demo in ./etc/mersenne.c + +Jan 2nd, 2003 +v0.08 -- Sped up the multipliers by moving the inner loop variables into a smaller scope + -- Corrected a bunch of small "warnings" + -- Added more comments + -- Made "mtest" be able to use /dev/random, /dev/urandom or stdin for RNG data + -- Corrected some bugs where error messages were potentially ignored + -- add etc/pprime.c program which makes numbers which are provably prime. + +Jan 1st, 2003 +v0.07 -- Removed alot of heap operations from core functions to speed them up + -- Added a root finding function [and mp_sqrt macro like from MPI] + -- Added more to manual + +Dec 31st, 2002 +v0.06 -- Sped up the s_mp_add, s_mp_sub which inturn sped up mp_invmod, mp_exptmod, etc... + -- Cleaned up the header a bit more + +Dec 30th, 2002 +v0.05 -- Builds with MSVC out of the box + -- Fixed a bug in mp_invmod w.r.t. even moduli + -- Made mp_toradix and mp_read_radix use char instead of unsigned char arrays + -- Fixed up exptmod to use fewer multiplications + -- Fixed up mp_init_size to use only one heap operation + -- Note there is a slight "off-by-one" bug in the library somewhere + without the padding (see the source for comment) the library + crashes in libtomcrypt. Anyways a reasonable workaround is to pad the + numbers which will always correct it since as the numbers grow the padding + will still be beyond the end of the number + -- Added more to the manual + +Dec 29th, 2002 +v0.04 -- Fixed a memory leak in mp_to_unsigned_bin + -- optimized invmod code + -- Fixed bug in mp_div + -- use exchange instead of copy for results + -- added a bit more to the manual + +Dec 27th, 2002 +v0.03 -- Sped up s_mp_mul_high_digs by not computing the carries of the lower digits + -- Fixed a bug where mp_set_int wouldn't zero the value first and set the used member. + -- fixed a bug in s_mp_mul_high_digs where the limit placed on the result digits was not calculated properly + -- fixed bugs in add/sub/mul/sqr_mod functions where if the modulus and dest were the same it wouldn't work + -- fixed a bug in mp_mod and mp_mod_d concerning negative inputs + -- mp_mul_d didn't preserve sign + -- Many many many many fixes + -- Works in LibTomCrypt now :-) + -- Added iterations to the timing demos... more accurate. + -- Tom needs a job. + +Dec 26th, 2002 +v0.02 -- Fixed a few "slips" in the manual. This is "LibTomMath" afterall :-) + -- Added mp_cmp_mag, mp_neg, mp_abs and mp_radix_size that were missing. + -- Sped up the fast [comba] multipliers more [yahoo!] + +Dec 25th,2002 +v0.01 -- Initial release. Gimme a break. + -- Todo list, + add details to manual [e.g. algorithms] + more comments in code + example programs diff --git a/demo/demo.c b/demo/demo.c index 7da356c..67984aa 100644 --- a/demo/demo.c +++ b/demo/demo.c @@ -1,5 +1,12 @@ #include <time.h> +#ifdef IOWNANATHLON +#include <unistd.h> +#define SLEEP sleep(4) +#else +#define SLEEP +#endif + #include "tommath.h" #ifdef TIMER @@ -185,6 +192,7 @@ int main(void) log = fopen("logs/add.log", "w"); for (cnt = 8; cnt <= 128; cnt += 8) { + SLEEP; mp_rand(&a, cnt); mp_rand(&b, cnt); reset(); @@ -201,11 +209,12 @@ int main(void) log = fopen("logs/sub.log", "w"); for (cnt = 8; cnt <= 128; cnt += 8) { + SLEEP; mp_rand(&a, cnt); mp_rand(&b, cnt); reset(); rr = 0; - do { + do { DO(mp_sub(&a,&b,&c)); rr += 16; } while (rdtsc() < (CLOCKS_PER_SEC * 2)); @@ -226,6 +235,7 @@ int main(void) log = fopen((ix==0)?"logs/mult.log":"logs/mult_kara.log", "w"); for (cnt = 32; cnt <= 288; cnt += 16) { + SLEEP; mp_rand(&a, cnt); mp_rand(&b, cnt); reset(); @@ -242,6 +252,7 @@ int main(void) log = fopen((ix==0)?"logs/sqr.log":"logs/sqr_kara.log", "w"); for (cnt = 32; cnt <= 288; cnt += 16) { + SLEEP; mp_rand(&a, cnt); reset(); rr = 0; @@ -265,7 +276,7 @@ int main(void) "1475979915214180235084898622737381736312066145333169775147771216478570297878078949377407337049389289382748507531496480477281264838760259191814463365330269540496961201113430156902396093989090226259326935025281409614983499388222831448598601834318536230923772641390209490231836446899608210795482963763094236630945410832793769905399982457186322944729636418890623372171723742105636440368218459649632948538696905872650486914434637457507280441823676813517852099348660847172579408422316678097670224011990280170474894487426924742108823536808485072502240519452587542875349976558572670229633962575212637477897785501552646522609988869914013540483809865681250419497686697771007", "259117086013202627776246767922441530941818887553125427303974923161874019266586362086201209516800483406550695241733194177441689509238807017410377709597512042313066624082916353517952311186154862265604547691127595848775610568757931191017711408826252153849035830401185072116424747461823031471398340229288074545677907941037288235820705892351068433882986888616658650280927692080339605869308790500409503709875902119018371991620994002568935113136548829739112656797303241986517250116412703509705427773477972349821676443446668383119322540099648994051790241624056519054483690809616061625743042361721863339415852426431208737266591962061753535748892894599629195183082621860853400937932839420261866586142503251450773096274235376822938649407127700846077124211823080804139298087057504713825264571448379371125032081826126566649084251699453951887789613650248405739378594599444335231188280123660406262468609212150349937584782292237144339628858485938215738821232393687046160677362909315071", "190797007524439073807468042969529173669356994749940177394741882673528979787005053706368049835514900244303495954950709725762186311224148828811920216904542206960744666169364221195289538436845390250168663932838805192055137154390912666527533007309292687539092257043362517857366624699975402375462954490293259233303137330643531556539739921926201438606439020075174723029056838272505051571967594608350063404495977660656269020823960825567012344189908927956646011998057988548630107637380993519826582389781888135705408653045219655801758081251164080554609057468028203308718724654081055323215860189611391296030471108443146745671967766308925858547271507311563765171008318248647110097614890313562856541784154881743146033909602737947385055355960331855614540900081456378659068370317267696980001187750995491090350108417050917991562167972281070161305972518044872048331306383715094854938415738549894606070722584737978176686422134354526989443028353644037187375385397838259511833166416134323695660367676897722287918773420968982326089026150031515424165462111337527431154890666327374921446276833564519776797633875503548665093914556482031482248883127023777039667707976559857333357013727342079099064400455741830654320379350833236245819348824064783585692924881021978332974949906122664421376034687815350484991", - + /* DR moduli */ "14059105607947488696282932836518693308967803494693489478439861164411992439598399594747002144074658928593502845729752797260025831423419686528151609940203368612079", "101745825697019260773923519755878567461315282017759829107608914364075275235254395622580447400994175578963163918967182013639660669771108475957692810857098847138903161308502419410142185759152435680068435915159402496058513611411688900243039", @@ -289,6 +300,7 @@ int main(void) logb = fopen("logs/expt_dr.log", "w"); logc = fopen("logs/expt_2k.log", "w"); for (n = 0; primes[n]; n++) { + SLEEP; mp_read_radix(&a, primes[n], 10); mp_zero(&b); for (rr = 0; rr < mp_count_bits(&a); rr++) { @@ -325,6 +337,7 @@ int main(void) log = fopen("logs/invmod.log", "w"); for (cnt = 4; cnt <= 128; cnt += 4) { + SLEEP; mp_rand(&a, cnt); mp_rand(&b, cnt); diff --git a/etc/makefile.msvc b/etc/makefile.msvc index 0ce4550..2833372 100644 --- a/etc/makefile.msvc +++ b/etc/makefile.msvc @@ -12,6 +12,9 @@ mersenne: mersenne.obj tune: tune.obj cl tune.obj ../tommath.lib + +mont: mont.obj + cl mont.obj ../tommath.lib drprime: drprime.obj cl drprime.obj ../tommath.lib diff --git a/logs/add.log b/logs/add.log index 1d02326..0cbbbfd 100644 --- a/logs/add.log +++ b/logs/add.log @@ -1,16 +1,16 @@ -224 12444616 -448 10412040 -672 8825112 -896 7519080 -1120 6428432 -1344 5794784 -1568 5242952 -1792 4737008 -2016 4434104 -2240 4132912 -2464 3827752 -2688 3589672 -2912 3350176 -3136 3180208 -3360 3014160 -3584 2847672 +224 12849536 +448 9956080 +672 8372000 +896 7065464 +1120 5824864 +1344 5141728 +1568 4511808 +1792 4170480 +2016 3802536 +2240 3500936 +2464 3193352 +2688 2991976 +2912 2818672 +3136 2661448 +3360 2506560 +3584 2343304 diff --git a/logs/addsub.png b/logs/addsub.png index 56391d9..3315272 100644 Binary files a/logs/addsub.png and b/logs/addsub.png differ diff --git a/logs/expt.log b/logs/expt.log index d0a6f34..1c256bb 100644 --- a/logs/expt.log +++ b/logs/expt.log @@ -1,7 +1,7 @@ -513 680 -769 257 -1025 117 -2049 17 -2561 9 -3073 5 +513 600 +769 221 +1025 103 +2049 15 +2561 8 +3073 4 4097 2 diff --git a/logs/expt.png b/logs/expt.png index 137cd03..36cab73 100644 Binary files a/logs/expt.png and b/logs/expt.png differ diff --git a/logs/expt_2k.log b/logs/expt_2k.log index dda04b2..f6d1ddd 100644 --- a/logs/expt_2k.log +++ b/logs/expt_2k.log @@ -1,6 +1,6 @@ -521 736 -607 552 -1279 112 -2203 33 -3217 13 -4253 6 +521 728 +607 549 +1279 100 +2203 29 +3217 11 +4253 5 diff --git a/logs/expt_dr.log b/logs/expt_dr.log index d578a42..7f72a1a 100644 --- a/logs/expt_dr.log +++ b/logs/expt_dr.log @@ -1,7 +1,7 @@ -532 1064 -784 460 -1036 240 -1540 91 -2072 43 -3080 15 -4116 6 +532 1032 +784 424 +1036 214 +1540 81 +2072 38 +3080 13 +4116 5 diff --git a/logs/invmod.log b/logs/invmod.log index d1198fb..1dec5cd 100644 --- a/logs/invmod.log +++ b/logs/invmod.log @@ -1,32 +1,32 @@ -112 16248 -224 8192 -336 5320 -448 3560 -560 2728 -672 2064 -784 1704 -896 2176 -1008 1184 -1120 976 -1232 1280 -1344 1176 -1456 624 -1568 912 -1680 504 -1792 452 -1904 658 -2016 608 -2128 336 -2240 312 -2352 288 -2464 264 -2576 408 -2688 376 -2800 354 -2912 198 -3024 307 -3136 173 -3248 162 -3360 256 -3472 145 -3584 226 +112 14936 +224 7208 +336 6864 +448 5000 +560 3648 +672 1832 +784 1480 +896 1232 +1008 1010 +1120 1360 +1232 728 +1344 632 +1456 544 +1568 800 +1680 704 +1792 396 +1904 584 +2016 528 +2128 483 +2240 448 +2352 250 +2464 378 +2576 350 +2688 198 +2800 300 +2912 170 +3024 265 +3136 150 +3248 142 +3360 134 +3472 126 +3584 118 diff --git a/logs/invmod.png b/logs/invmod.png index a497a72..24866d5 100644 Binary files a/logs/invmod.png and b/logs/invmod.png differ diff --git a/logs/mult.log b/logs/mult.log index e69de29..ac8bbe4 100644 --- a/logs/mult.log +++ b/logs/mult.log @@ -0,0 +1,17 @@ +896 301008 +1344 141872 +1792 84424 +2240 55864 +2688 39784 +3136 29624 +3584 22952 +4032 18304 +4480 14944 +4928 12432 +5376 10496 +5824 8976 +6272 7776 +6720 6792 +7168 1656 +7616 1472 +8064 1312 diff --git a/logs/mult.png b/logs/mult.png index 3cd8a93..ddc7680 100644 Binary files a/logs/mult.png and b/logs/mult.png differ diff --git a/logs/mult_kara.log b/logs/mult_kara.log index 53c0864..6fd8ada 100644 --- a/logs/mult_kara.log +++ b/logs/mult_kara.log @@ -1,17 +1,17 @@ -896 322872 -1344 151688 -1792 90480 -2240 59984 -2688 42656 -3136 32144 -3584 25840 -4032 21328 -4480 17856 -4928 14928 -5376 12856 -5824 11256 -6272 9880 -6720 8984 -7168 7928 -7616 7200 -8064 6576 +896 301784 +1344 141568 +1792 84592 +2240 55864 +2688 39576 +3136 30088 +3584 24032 +4032 19760 +4480 16536 +4928 13376 +5376 11880 +5824 10256 +6272 9160 +6720 8208 +7168 7384 +7616 6664 +8064 6112 diff --git a/logs/sqr.log b/logs/sqr.log index e69de29..14f7178 100644 --- a/logs/sqr.log +++ b/logs/sqr.log @@ -0,0 +1,17 @@ +896 371856 +1344 196352 +1792 122312 +2240 83144 +2688 60304 +3136 45832 +3584 12760 +4032 10160 +4480 8352 +4928 6944 +5376 5824 +5824 5008 +6272 4336 +6720 3768 +7168 3280 +7616 2952 +8064 2640 diff --git a/logs/sqr_kara.log b/logs/sqr_kara.log index ba30f9e..336f4f7 100644 --- a/logs/sqr_kara.log +++ b/logs/sqr_kara.log @@ -1,17 +1,17 @@ -896 420464 -1344 224800 -1792 142808 -2240 97704 -2688 71416 -3136 54504 -3584 38320 -4032 32360 -4480 27576 -4928 23840 -5376 20688 -5824 18264 -6272 16176 -6720 14440 -7168 11688 -7616 10752 -8064 9936 +896 372256 +1344 196368 +1792 122272 +2240 82976 +2688 60480 +3136 45808 +3584 33296 +4032 27888 +4480 23608 +4928 20296 +5376 17576 +5824 15416 +6272 13600 +6720 12104 +7168 10080 +7616 9232 +8064 8008 diff --git a/logs/sub.log b/logs/sub.log index 272c098..ce6ee3f 100644 --- a/logs/sub.log +++ b/logs/sub.log @@ -1,16 +1,16 @@ -224 10876088 -448 9103552 -672 7823536 -896 6724960 -1120 5993496 -1344 5278984 -1568 4947736 -1792 4478384 -2016 4108840 -2240 3838696 -2464 3604128 -2688 3402192 -2912 3166568 -3136 3090672 -3360 2946720 -3584 2781288 +224 9325944 +448 8075808 +672 7054912 +896 5757992 +1120 5081768 +1344 4669384 +1568 4422384 +1792 3900416 +2016 3548872 +2240 3428912 +2464 3216968 +2688 2905280 +2912 2782664 +3136 2591440 +3360 2475728 +3584 2282216 diff --git a/makefile b/makefile index 9de5e56..afc1000 100644 --- a/makefile +++ b/makefile @@ -6,7 +6,7 @@ CFLAGS += -I./ -Wall -W -Wshadow -O3 -funroll-loops #x86 optimizations [should be valid for any GCC install though] CFLAGS += -fomit-frame-pointer -VERSION=0.25 +VERSION=0.26 default: libtommath.a @@ -81,12 +81,6 @@ poster: poster.tex docs: cd pics ; make pdfes echo "hello" > tommath.ind - perl booker.pl - latex tommath > /dev/null - makeindex tommath - latex tommath > /dev/null - dvips -tB5 -D600 tommath - echo "hello" > tommath.ind perl booker.pl PDF latex tommath > /dev/null makeindex tommath diff --git a/mtest/test.c~ b/mtest/test.c~ new file mode 100644 index 0000000..efdaa58 --- /dev/null +++ b/mtest/test.c~ @@ -0,0 +1,23 @@ +#include <stdio.h> +#include "mpi.c" + +int main(void) +{ + mp_int a, b; + int ix; + char buf[1024]; + + mp_init(&a); + mp_init(&b); + + mp_set(&a, 0x1B); + mp_neg(&a, &a); + ix = 0; + mp_add_d(&a, ix, &b); + + mp_toradix(&b, buf, 64); + printf("b == %s\n", buf); + return 0; +} + + diff --git a/pics/makefile b/pics/makefile index 18c3d44..3ecb02f 100644 --- a/pics/makefile +++ b/pics/makefile @@ -3,16 +3,16 @@ default: pses design_process.ps: design_process.tif - tiff2ps -c -e design_process.tif > design_process.ps + tiff2ps -s -e design_process.tif > design_process.ps sliding_window.ps: sliding_window.tif - tiff2ps -c -e sliding_window.tif > sliding_window.ps + tiff2ps -s -e sliding_window.tif > sliding_window.ps expt_state.ps: expt_state.tif - tiff2ps -c -e expt_state.tif > expt_state.ps + tiff2ps -s -e expt_state.tif > expt_state.ps primality.ps: primality.tif - tiff2ps -c -e primality.tif > primality.ps + tiff2ps -s -e primality.tif > primality.ps design_process.pdf: design_process.ps epstopdf design_process.ps diff --git a/pics/makefile~ b/pics/makefile~ new file mode 100644 index 0000000..4d9bd6b --- /dev/null +++ b/pics/makefile~ @@ -0,0 +1,35 @@ +# makes the images... yeah + +default: pses + +design_process.ps: design_process.tif + tiff2ps -s -e design_process.tif > design_process.ps + +sliding_window.ps: sliding_window.tif + tiff2ps -e sliding_window.tif > sliding_window.ps + +expt_state.ps: expt_state.tif + tiff2ps -e expt_state.tif > expt_state.ps + +primality.ps: primality.tif + tiff2ps -e primality.tif > primality.ps + +design_process.pdf: design_process.ps + epstopdf design_process.ps + +sliding_window.pdf: sliding_window.ps + epstopdf sliding_window.ps + +expt_state.pdf: expt_state.ps + epstopdf expt_state.ps + +primality.pdf: primality.ps + epstopdf primality.ps + + +pses: sliding_window.ps expt_state.ps primality.ps design_process.ps +pdfes: sliding_window.pdf expt_state.pdf primality.pdf design_process.pdf + +clean: + rm -rf *.ps *.pdf .xvpics + \ No newline at end of file diff --git a/poster.pdf b/poster.pdf index 1d25593..d12d949 100644 Binary files a/poster.pdf and b/poster.pdf differ diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c index 4e748f8..72cc770 100644 --- a/pre_gen/mpi.c +++ b/pre_gen/mpi.c @@ -241,7 +241,10 @@ fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) register mp_word *_W; register mp_digit *tmpx; - _W = W; + /* alias for the W[] array */ + _W = W; + + /* alias for the digits of x*/ tmpx = x->dp; /* copy the digits of a into W[0..a->used-1] */ @@ -925,7 +928,6 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) return res; } - /* old number of used digits in c */ oldused = c->used; @@ -959,7 +961,6 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) /* set final carry */ ix++; *tmpc++ = mu; - } else { /* a was negative and |a| < b */ c->used = 1; @@ -1217,12 +1218,12 @@ int mp_cmp (mp_int * a, mp_int * b) { /* compare based on sign */ - if (a->sign == MP_NEG && b->sign == MP_ZPOS) { - return MP_LT; - } - - if (a->sign == MP_ZPOS && b->sign == MP_NEG) { - return MP_GT; + if (a->sign != b->sign) { + if (a->sign == MP_NEG) { + return MP_LT; + } else { + return MP_GT; + } } /* compare digits */ @@ -1301,23 +1302,30 @@ int mp_cmp_mag (mp_int * a, mp_int * b) { int n; + mp_digit *tmpa, *tmpb; /* compare based on # of non-zero digits */ if (a->used > b->used) { return MP_GT; - } + } if (a->used < b->used) { return MP_LT; } + /* alias for a */ + tmpa = a->dp + (a->used - 1); + + /* alias for b */ + tmpb = b->dp + (a->used - 1); + /* compare based on digits */ - for (n = a->used - 1; n >= 0; n--) { - if (a->dp[n] > b->dp[n]) { + for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { + if (*tmpa > *tmpb) { return MP_GT; - } - - if (a->dp[n] < b->dp[n]) { + } + + if (*tmpa < *tmpb) { return MP_LT; } } @@ -1594,8 +1602,9 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d) /* step 3. for i from n down to (t + 1) */ for (i = n; i >= (t + 1); i--) { - if (i > x.used) + if (i > x.used) { continue; + } /* step 3.1 if xi == yt then set q{i-t-1} to b-1, * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ @@ -2111,6 +2120,8 @@ int mp_dr_is_modulus(mp_int *a) * The modulus must be of a special format [see manual] * * Has been modified to use algorithm 7.10 from the LTM book instead + * + * Input x must be in the range 0 <= x <= (n-1)^2 */ int mp_dr_reduce (mp_int * x, mp_int * n, mp_digit k) @@ -2149,10 +2160,10 @@ top: *tmpx1++ = (mp_digit)(r & MP_MASK); mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); } - + /* set final carry */ *tmpx1++ = mu; - + /* zero words above m */ for (i = m + 1; i < x->used; i++) { *tmpx1++ = 0; @@ -3060,6 +3071,8 @@ int mp_init_multi(mp_int *mp, ...) va_end(args); return res; /* Assumed ok, if error flagged above. */ } + + /* End: bn_mp_init_multi.c */ /* Start: bn_mp_init_size.c */ @@ -3689,30 +3702,43 @@ ERR: */ #include <tommath.h> -/* computes least common multiple as a*b/(a, b) */ +/* computes least common multiple as |a*b|/(a, b) */ int mp_lcm (mp_int * a, mp_int * b, mp_int * c) { int res; - mp_int t; + mp_int t1, t2; - if ((res = mp_init (&t)) != MP_OKAY) { + if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) { return res; } - if ((res = mp_mul (a, b, &t)) != MP_OKAY) { - mp_clear (&t); - return res; + /* t1 = get the GCD of the two inputs */ + if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) { + goto __T; } - if ((res = mp_gcd (a, b, c)) != MP_OKAY) { - mp_clear (&t); - return res; + /* divide the smallest by the GCD */ + if (mp_cmp_mag(a, b) == MP_LT) { + /* store quotient in t2 such that t2 * b is the LCM */ + if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) { + goto __T; + } + res = mp_mul(b, &t2, c); + } else { + /* store quotient in t2 such that t2 * a is the LCM */ + if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) { + goto __T; + } + res = mp_mul(a, &t2, c); } - res = mp_div (&t, c, c, NULL); - mp_clear (&t); + /* fix the sign to positive */ + c->sign = MP_ZPOS; + +__T: + mp_clear_multi (&t1, &t2, NULL); return res; } @@ -4012,7 +4038,14 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) x->used = digs; for (ix = 0; ix < n->used; ix++) { - /* mu = ai * m' mod b */ + /* mu = ai * rho mod b + * + * The value of rho must be precalculated via + * bn_mp_montgomery_setup() such that + * it equals -1/n0 mod b this allows the + * following inner loop to reduce the + * input one digit at a time + */ mu = ((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK; /* a = a + mu * m * b**i */ @@ -4021,8 +4054,10 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) register mp_digit *tmpn, *tmpx, u; register mp_word r; - /* aliases */ + /* alias for digits of the modulus */ tmpn = n->dp; + + /* alias for the digits of x [the input] */ tmpx = x->dp + ix; /* set the carry to zero */ @@ -4030,12 +4065,20 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) /* Multiply and add in place */ for (iy = 0; iy < n->used; iy++) { + /* compute product and sum */ r = ((mp_word)mu) * ((mp_word)*tmpn++) + ((mp_word) u) + ((mp_word) * tmpx); + + /* get carry */ u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + + /* fix digit */ *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); } - /* propagate carries */ + /* At this point the ix'th digit of x should be zero */ + + + /* propagate carries upwards as required*/ while (u) { *tmpx += u; u = *tmpx >> DIGIT_BIT; @@ -4044,11 +4087,18 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) } } + /* at this point the n.used'th least + * significant digits of x are all zero + * which means we can shift x to the + * right by n.used digits and the + * residue is unchanged. + */ + /* x = x/b**n.used */ mp_clamp(x); mp_rshd (x, n->used); - /* if A >= m then A = A - m */ + /* if x >= n then x = x - n */ if (mp_cmp_mag (x, n) != MP_LT) { return s_mp_sub (x, n, x); } @@ -4606,7 +4656,9 @@ mp_neg (mp_int * a, mp_int * b) if ((res = mp_copy (a, b)) != MP_OKAY) { return res; } - b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; + if (mp_iszero(b) != 1) { + b->sign = (a->sign == MP_ZPOS) ? MP_NEG : MP_ZPOS; + } return MP_OKAY; } @@ -5360,10 +5412,18 @@ mp_read_signed_bin (mp_int * a, unsigned char *b, int c) { int res; + /* read magnitude */ if ((res = mp_read_unsigned_bin (a, b + 1, c - 1)) != MP_OKAY) { return res; } - a->sign = ((b[0] == (unsigned char) 0) ? MP_ZPOS : MP_NEG); + + /* first byte is 0 for positive, non-zero for negative */ + if (b[0] == 0) { + a->sign = MP_ZPOS; + } else { + a->sign = MP_NEG; + } + return MP_OKAY; } @@ -5391,7 +5451,18 @@ int mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c) { int res; + + /* make sure there are at least two digits */ + if (a->alloc < 2) { + if ((res = mp_grow(a, 2)) != MP_OKAY) { + return res; + } + } + + /* zero the int */ mp_zero (a); + + /* read the bytes in */ while (c-- > 0) { if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { return res; @@ -5804,6 +5875,7 @@ mp_set_int (mp_int * a, unsigned int b) int x, res; mp_zero (a); + /* set four bits at a time */ for (x = 0; x < 8; x++) { /* shift the number up four bits */ @@ -7414,15 +7486,15 @@ s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) /* compute the columns of the output and propagate the carry */ for (iy = 0; iy < pb; iy++) { /* compute the column as a mp_word */ - r = ((mp_word) *tmpt) + - ((mp_word)tmpx) * ((mp_word)*tmpy++) + - ((mp_word) u); + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); /* the new column is the lower part of the result */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get the carry word from the result */ - u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } /* set carry if it is placed below digs */ if (ix + iy < digs) { @@ -7468,7 +7540,6 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) mp_word r; mp_digit tmpx, *tmpt, *tmpy; - /* can we use the fast multiplier? */ if (((a->used + b->used + 1) < MP_WARRAY) && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { @@ -7497,13 +7568,15 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) for (iy = digs - ix; iy < pb; iy++) { /* calculate the double precision result */ - r = ((mp_word) * tmpt) + ((mp_word)tmpx) * ((mp_word)*tmpy++) + ((mp_word) u); + r = ((mp_word)*tmpt) + + ((mp_word)tmpx) * ((mp_word)*tmpy++) + + ((mp_word) u); /* get the lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* carry the carry */ - u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); } *tmpt = u; } @@ -7572,19 +7645,19 @@ s_mp_sqr (mp_int * a, mp_int * b) /* now calculate the double precision result, note we use * addition instead of *2 since it's easier to optimize */ - r = ((mp_word) *tmpt) + r + r + ((mp_word) u); + r = ((mp_word) *tmpt) + r + r + ((mp_word) u); /* store lower part */ *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); /* get carry */ - u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } /* propagate upwards */ while (u != ((mp_digit) 0)) { - r = ((mp_word) * tmpt) + ((mp_word) u); + r = ((mp_word) *tmpt) + ((mp_word) u); *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); - u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); + u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); } }