diff --git a/bn.pdf b/bn.pdf index 596c440..d047a83 100644 Binary files a/bn.pdf and b/bn.pdf differ diff --git a/bn.tex b/bn.tex index 79547f3..980d6b9 100644 --- a/bn.tex +++ b/bn.tex @@ -1,7 +1,7 @@ -\documentclass{article} +\documentclass[]{report} \begin{document} -\title{LibTomMath v0.15 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } +\title{LibTomMath v0.16 \\ A Free Multiple Precision Integer Library \\ http://math.libtomcrypt.org } \author{Tom St Denis \\ tomstdenis@iahu.ca} \maketitle \newpage @@ -286,6 +286,9 @@ int mp_mul_d(mp_int *a, mp_digit b, mp_int *c); /* a/b => cb + d == a */ int mp_div_d(mp_int *a, mp_digit b, mp_int *c, mp_digit *d); +/* c = a^b */ +int mp_expt_d(mp_int *a, mp_digit b, mp_int *c); + /* c = a mod b, 0 <= c < b */ int mp_mod_d(mp_int *a, mp_digit b, mp_digit *c); \end{verbatim} @@ -1027,6 +1030,32 @@ p == 10951211157166778028568112903923951285881685924091094949001780089679 \end{verbatim} \end{small} +%\newpage +%\section*{Appendix B - Function Quick Sheet} +%The following table gives a quick summary of the functions provided within LibTomMath. + +%\begin{flushleft} +%\begin{tiny} +%\begin{tabular}{|l|l|l|} +%\hline \textbf{Function Name} & \textbf{Purpose} & \textbf{Notes} \\ +%\hline mp\_init(mp\_int *a) & Initializes a mp\_int & Allocates runtime memory required for an integer \\ +%\hline mp\_clear(mp\_int *a) & Frees the ram used by an mp\_int & \\ +%\hline mp\_exch(mp\_int *a, mp\_int *b) & Swaps two mp\_int structures contents & \\ +%\hline mp\_shrink(mp\_int *a) & Frees unused memory & The mp\_int is still valid and not cleared. \\ +%\hline mp\_grow(mp\_int *a, int size) & Ensures that a has at least $size$ digits allocated & \\ +%\hline mp\_init\_size(mp\_int a, int size) & Inits and ensures it has at least $size$ digits & \\ +%\hline &&\\ +%\hline mp\_zero(mp\_int *a) & $a \leftarrow 0$ & \\ +%\hline mp\_set(mp\_int *a, mp\_digit b) & $a \leftarrow b$ & \\ +%\hline mp\_set\_int(mp\_int *a, unsigned long b) & $a \leftarrow b$ & Only reads upto 32 bits from $b$ \\ +%\hline &&\\ +%\hline mp\_rshd(mp\_int *a, int b) & $a \leftarrow a/\beta^b$ & \\ +%\hline mp\_lshd(mp\_int *a, int b) & $a \leftarrow a \cdot \beta^b$ &\\ +%\hline mp\_div\_2d(mp\_int *a, int b, mp\_int *c, mp\_int *d) & &\\ +%\hline +%\end{tabular} +%\end{tiny} +%\end{flushleft} \end{document} diff --git a/bn_fast_mp_invmod.c b/bn_fast_mp_invmod.c index 38c265e..68cdf1c 100644 --- a/bn_fast_mp_invmod.c +++ b/bn_fast_mp_invmod.c @@ -26,6 +26,7 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) mp_int x, y, u, v, B, D; int res, neg; + /* init all our temps */ if ((res = mp_init (&x)) != MP_OKAY) { goto __ERR; } @@ -58,6 +59,7 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) goto __D; } + /* we need |y| */ if ((res = mp_abs (&y, &y)) != MP_OKAY) { goto __D; } @@ -93,13 +95,12 @@ top: goto __D; } } - /* A = A/2, B = B/2 */ + /* B = B/2 */ if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { goto __D; } } - /* 5. while v is even do */ while (mp_iseven (&v) == 1) { /* 5.1 v = v/2 */ @@ -108,12 +109,12 @@ top: } /* 5.2 if C,D are even then */ if (mp_iseven (&D) == 0) { - /* C = (C+y)/2, D = (D-x)/2 */ + /* D = (D-x)/2 */ if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { goto __D; } } - /* C = C/2, D = D/2 */ + /* D = D/2 */ if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { goto __D; } @@ -121,7 +122,7 @@ top: /* 6. if u >= v then */ if (mp_cmp (&u, &v) != MP_LT) { - /* u = u - v, A = A - C, B = B - D */ + /* u = u - v, B = B - D */ if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { goto __D; } @@ -130,7 +131,7 @@ top: goto __D; } } else { - /* v - v - u, C = C - A, D = D - B */ + /* v - v - u, D = D - B */ if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { goto __D; } diff --git a/bn_fast_mp_montgomery_reduce.c b/bn_fast_mp_montgomery_reduce.c index 17be2e4..031b410 100644 --- a/bn_fast_mp_montgomery_reduce.c +++ b/bn_fast_mp_montgomery_reduce.c @@ -45,12 +45,12 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp) _W = W; tmpa = a->dp; - /* copy the digits of a */ + /* copy the digits of a into W[0..a->used-1] */ for (ix = 0; ix < a->used; ix++) { *_W++ = *tmpa++; } - /* zero the high words */ + /* zero the high words of W[a->used..m->used*2] */ for (; ix < m->used * 2 + 1; ix++) { *_W++ = 0; } diff --git a/bn_mp_add_d.c b/bn_mp_add_d.c index 0fe5ad3..66d1f07 100644 --- a/bn_mp_add_d.c +++ b/bn_mp_add_d.c @@ -21,7 +21,7 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) mp_int t; int res; - if ((res = mp_init (&t)) != MP_OKAY) { + if ((res = mp_init_size(&t, 1)) != MP_OKAY) { return res; } mp_set (&t, b); diff --git a/bn_mp_clamp.c b/bn_mp_clamp.c index f2839a1..c349f7c 100644 --- a/bn_mp_clamp.c +++ b/bn_mp_clamp.c @@ -24,8 +24,9 @@ void mp_clamp (mp_int * a) { - while (a->used > 0 && a->dp[a->used - 1] == 0) + while (a->used > 0 && a->dp[a->used - 1] == 0) { --(a->used); + } if (a->used == 0) { a->sign = MP_ZPOS; } diff --git a/bn_mp_copy.c b/bn_mp_copy.c index 10ab6a6..1bf5f12 100644 --- a/bn_mp_copy.c +++ b/bn_mp_copy.c @@ -37,6 +37,7 @@ mp_copy (mp_int * a, mp_int * b) { register mp_digit *tmpa, *tmpb; + /* point aliases */ tmpa = a->dp; tmpb = b->dp; diff --git a/bn_mp_div.c b/bn_mp_div.c index 8eceec8..3888a4b 100644 --- a/bn_mp_div.c +++ b/bn_mp_div.c @@ -74,17 +74,19 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d) x.sign = y.sign = MP_ZPOS; /* normalize both x and y, ensure that y >= b/2, [b == 2^DIGIT_BIT] */ - norm = 0; - while ((y.dp[y.used - 1] & (((mp_digit) 1) << (DIGIT_BIT - 1))) == ((mp_digit) 0)) { - ++norm; - if ((res = mp_mul_2 (&x, &x)) != MP_OKAY) { - goto __Y; - } - if ((res = mp_mul_2 (&y, &y)) != MP_OKAY) { - goto __Y; - } + norm = mp_count_bits(&y) % DIGIT_BIT; + if (norm < (DIGIT_BIT-1)) { + norm = (DIGIT_BIT-1) - norm; + if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { + goto __Y; + } + if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { + goto __Y; + } + } else { + norm = 0; } - + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ n = x.used - 1; t = y.used - 1; diff --git a/bn_mp_div_2.c b/bn_mp_div_2.c index 787f87a..858e8a4 100644 --- a/bn_mp_div_2.c +++ b/bn_mp_div_2.c @@ -32,15 +32,26 @@ mp_div_2 (mp_int * a, mp_int * b) { register mp_digit r, rr, *tmpa, *tmpb; + /* source alias */ tmpa = a->dp + b->used - 1; + + /* dest alias */ tmpb = b->dp + b->used - 1; + + /* carry */ r = 0; for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ r = rr; } + /* zero excess digits */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; diff --git a/bn_mp_div_2d.c b/bn_mp_div_2d.c index 4258c05..75501a4 100644 --- a/bn_mp_div_2d.c +++ b/bn_mp_div_2d.c @@ -58,13 +58,23 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) /* shift any bit count < DIGIT_BIT */ D = (mp_digit) (b % DIGIT_BIT); if (D != 0) { + register mp_digit *tmpc, mask; + + /* mask */ + mask = (1U << D) - 1U; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ r = 0; for (x = c->used - 1; x >= 0; x--) { /* get the lower bits of this word in a temp */ - rr = c->dp[x] & ((mp_digit) ((1U << D) - 1U)); + rr = *tmpc & mask; /* shift the current word and mix in the carry bits from the previous word */ - c->dp[x] = (c->dp[x] >> D) | (r << (DIGIT_BIT - D)); + *tmpc = (*tmpc >> D) | (r << (DIGIT_BIT - D)); + --tmpc; /* set the carry to the carry bits of the current word found above */ r = rr; diff --git a/bn_mp_div_d.c b/bn_mp_div_d.c index 4c25a74..4b33a42 100644 --- a/bn_mp_div_d.c +++ b/bn_mp_div_d.c @@ -33,6 +33,7 @@ mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d) mp_set (&t, b); res = mp_div (a, &t, c, &t2); + /* set remainder if not null */ if (d != NULL) { *d = t2.dp[0]; } diff --git a/bn_mp_exch.c b/bn_mp_exch.c index 526c4c9..b2cdab7 100644 --- a/bn_mp_exch.c +++ b/bn_mp_exch.c @@ -14,6 +14,9 @@ */ #include +/* swap the elements of two integers, for cases where you can't simply swap the + * mp_int pointers around + */ void mp_exch (mp_int * a, mp_int * b) { diff --git a/bn_mp_expt_d.c b/bn_mp_expt_d.c index 602785b..144ae07 100644 --- a/bn_mp_expt_d.c +++ b/bn_mp_expt_d.c @@ -14,13 +14,13 @@ */ #include +/* calculate c = a^b using a square-multiply algorithm */ int mp_expt_d (mp_int * a, mp_digit b, mp_int * c) { int res, x; mp_int g; - if ((res = mp_init_copy (&g, a)) != MP_OKAY) { return res; } @@ -29,11 +29,13 @@ mp_expt_d (mp_int * a, mp_digit b, mp_int * c) mp_set (c, 1); for (x = 0; x < (int) DIGIT_BIT; x++) { + /* square */ if ((res = mp_sqr (c, c)) != MP_OKAY) { mp_clear (&g); return res; } + /* if the bit is set multiply */ if ((b & (mp_digit) (1 << (DIGIT_BIT - 1))) != 0) { if ((res = mp_mul (c, &g, c)) != MP_OKAY) { mp_clear (&g); @@ -41,6 +43,7 @@ mp_expt_d (mp_int * a, mp_digit b, mp_int * c) } } + /* shift to next bit */ b <<= 1; } diff --git a/bn_mp_exptmod.c b/bn_mp_exptmod.c index a780dbc..b6635f5 100644 --- a/bn_mp_exptmod.c +++ b/bn_mp_exptmod.c @@ -164,7 +164,7 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) mode = 2; if (bitcpy == winsize) { - /* ok window is filled so square as required and multiply multiply */ + /* ok window is filled so square as required and multiply */ /* square first */ for (x = 0; x < winsize; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { diff --git a/bn_mp_exptmod_fast.c b/bn_mp_exptmod_fast.c index 83c7b7a..0906f27 100644 --- a/bn_mp_exptmod_fast.c +++ b/bn_mp_exptmod_fast.c @@ -19,7 +19,7 @@ * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. * The value of k changes based on the size of the exponent. * - * Uses Montgomery reduction + * Uses Montgomery or Diminished Radix reduction [whichever appropriate] */ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) @@ -29,7 +29,6 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; int (*redux)(mp_int*,mp_int*,mp_digit); - /* find window size */ x = mp_count_bits (X); if (x <= 7) { @@ -169,7 +168,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) mode = 2; if (bitcpy == winsize) { - /* ok window is filled so square as required and multiply multiply */ + /* ok window is filled so square as required and multiply */ /* square first */ for (x = 0; x < winsize; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { diff --git a/bn_mp_gcd.c b/bn_mp_gcd.c index df68841..d7cc1d4 100644 --- a/bn_mp_gcd.c +++ b/bn_mp_gcd.c @@ -22,7 +22,6 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) mp_int u, v, t; int k, res, neg; - /* either zero than gcd is the largest */ if (mp_iszero (a) == 1 && mp_iszero (b) == 0) { return mp_copy (b, c); @@ -55,7 +54,7 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) /* B1. Find power of two */ k = 0; - while ((u.dp[0] & 1) == 0 && (v.dp[0] & 1) == 0) { + while (mp_iseven(&u) == 1 && mp_iseven(&v) == 1) { ++k; if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { goto __T; @@ -66,12 +65,14 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) } /* B2. Initialize */ - if ((u.dp[0] & 1) == 1) { + if (mp_isodd(&u) == 1) { + /* t = -v */ if ((res = mp_copy (&v, &t)) != MP_OKAY) { goto __T; } t.sign = MP_NEG; } else { + /* t = u */ if ((res = mp_copy (&u, &t)) != MP_OKAY) { goto __T; } @@ -79,7 +80,7 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) do { /* B3 (and B4). Halve t, if even */ - while (t.used != 0 && (t.dp[0] & 1) == 0) { + while (t.used != 0 && mp_iseven(&t) == 1) { if ((res = mp_div_2 (&t, &t)) != MP_OKAY) { goto __T; } diff --git a/bn_mp_grow.c b/bn_mp_grow.c index 369fb4e..9bd5118 100644 --- a/bn_mp_grow.c +++ b/bn_mp_grow.c @@ -22,13 +22,15 @@ mp_grow (mp_int * a, int size) /* if the alloc size is smaller alloc more ram */ if (a->alloc < size) { - size += (MP_PREC * 2) - (size & (MP_PREC - 1)); /* ensure there are always at least MP_PREC digits extra on top */ + /* ensure there are always at least MP_PREC digits extra on top */ + size += (MP_PREC * 2) - (size & (MP_PREC - 1)); a->dp = OPT_CAST realloc (a->dp, sizeof (mp_digit) * size); if (a->dp == NULL) { return MP_MEM; } + /* zero excess digits */ n = a->alloc; a->alloc = size; for (i = n; i < a->alloc; i++) { diff --git a/bn_mp_init.c b/bn_mp_init.c index 7c3ee01..b96e6d9 100644 --- a/bn_mp_init.c +++ b/bn_mp_init.c @@ -27,9 +27,9 @@ mp_init (mp_int * a) /* set the used to zero, allocated digit to the default precision * and sign to positive */ - a->used = 0; + a->used = 0; a->alloc = MP_PREC; - a->sign = MP_ZPOS; + a->sign = MP_ZPOS; return MP_OKAY; } diff --git a/bn_mp_init_size.c b/bn_mp_init_size.c index 45d8dc5..2408c1a 100644 --- a/bn_mp_init_size.c +++ b/bn_mp_init_size.c @@ -19,8 +19,10 @@ int mp_init_size (mp_int * a, int size) { - /* pad up so there are at least 16 zero digits */ - size += (MP_PREC * 2) - (size & (MP_PREC - 1)); /* ensure there are always at least 16 digits extra on top */ + /* pad size so there are always extra digits */ + size += (MP_PREC * 2) - (size & (MP_PREC - 1)); + + /* alloc mem */ a->dp = OPT_CAST calloc (sizeof (mp_digit), size); if (a->dp == NULL) { return MP_MEM; diff --git a/bn_mp_mul_2.c b/bn_mp_mul_2.c index fca7125..fd8db1f 100644 --- a/bn_mp_mul_2.c +++ b/bn_mp_mul_2.c @@ -35,17 +35,29 @@ mp_mul_2 (mp_int * a, mp_int * b) { register mp_digit r, rr, *tmpa, *tmpb; - r = 0; + /* alias for source */ tmpa = a->dp; + + /* alias for dest */ tmpb = b->dp; + + /* carry */ + r = 0; for (x = 0; x < b->used; x++) { + + /* get what will be the *next* carry bit from the MSB of the current digit */ rr = *tmpa >> (DIGIT_BIT - 1); + + /* now shift up this digit, add in the carry [from the previous] */ *tmpb++ = ((*tmpa++ << 1) | r) & MP_MASK; + + /* copy the carry that would be from the source digit into the next iteration */ r = rr; } /* new leading digit? */ if (r != 0) { + /* do we have to grow to accomodate the new digit? */ if (b->alloc == b->used) { if ((res = mp_grow (b, b->used + 1)) != MP_OKAY) { return res; @@ -56,11 +68,12 @@ mp_mul_2 (mp_int * a, mp_int * b) */ tmpb = b->dp + b->used; } - /* add a MSB of 1 */ + /* add a MSB which is always 1 at this point */ *tmpb = 1; ++b->used; } + /* now zero any excess digits on the destination that we didn't write to */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; diff --git a/bn_mp_mul_2d.c b/bn_mp_mul_2d.c index 3b336d1..137df30 100644 --- a/bn_mp_mul_2d.c +++ b/bn_mp_mul_2d.c @@ -21,7 +21,6 @@ mp_mul_2d (mp_int * a, int b, mp_int * c) mp_digit d, r, rr; int x, res; - /* copy */ if ((res = mp_copy (a, c)) != MP_OKAY) { return res; @@ -42,13 +41,23 @@ mp_mul_2d (mp_int * a, int b, mp_int * c) /* shift any bit count < DIGIT_BIT */ d = (mp_digit) (b % DIGIT_BIT); if (d != 0) { - r = 0; + register mp_digit *tmpc, mask; + + /* bitmask for carries */ + mask = (1U << d) - 1U; + + /* alias */ + tmpc = c->dp; + + /* carry */ + r = 0; for (x = 0; x < c->used; x++) { /* get the higher bits of the current word */ - rr = (c->dp[x] >> (DIGIT_BIT - d)) & ((mp_digit) ((1U << d) - 1U)); + rr = (*tmpc >> (DIGIT_BIT - d)) & mask; /* shift the current word and OR in the carry */ - c->dp[x] = ((c->dp[x] << d) | r) & MP_MASK; + *tmpc = ((*tmpc << d) | r) & MP_MASK; + ++tmpc; /* set the carry to the carry bits of the current word */ r = rr; diff --git a/bn_radix.c b/bn_radix.c index 6aeda17..3b4b639 100644 --- a/bn_radix.c +++ b/bn_radix.c @@ -17,7 +17,6 @@ /* chars used in radix conversions */ static const char *s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; - /* read a string [ASCII] in a given radix */ int mp_read_radix (mp_int * a, char *str, int radix) diff --git a/changes.txt b/changes.txt index df7ac4e..6833bdc 100644 --- a/changes.txt +++ b/changes.txt @@ -1,3 +1,9 @@ +Mar 29th, 2003 +v0.16 -- Sped up mp_div by making normalization one shift call + -- Sped up mp_mul_2d/mp_div_2d by aliasing pointers :-) + -- Cleaned up mp_gcd to use the macros for odd/even detection + -- Added comments here and there, mostly there but occasionally here too. + Mar 22nd, 2003 v0.15 -- Added series of prime testing routines to lib -- Fixed up etc/tune.c diff --git a/demo/demo.c b/demo/demo.c index 8cf6dfe..ff85903 100644 --- a/demo/demo.c +++ b/demo/demo.c @@ -140,6 +140,7 @@ int main(void) #ifdef TIMER printf("CLOCKS_PER_SEC == %lu\n", CLOCKS_PER_SEC); +goto sqrtime; log = fopen("add.log", "w"); for (cnt = 4; cnt <= 128; cnt += 4) { @@ -170,6 +171,7 @@ int main(void) fclose(log); +sqrtime: log = fopen("sqr.log", "w"); for (cnt = 4; cnt <= 128; cnt += 4) { mp_rand(&a, cnt); @@ -197,6 +199,7 @@ int main(void) } fclose(log); +expttime: { char *primes[] = { /* DR moduli */ diff --git a/makefile b/makefile index 4219e6b..8466163 100644 --- a/makefile +++ b/makefile @@ -1,6 +1,6 @@ CFLAGS += -I./ -Wall -W -Wshadow -O3 -fomit-frame-pointer -funroll-loops -VERSION=0.15 +VERSION=0.16 default: libtommath.a diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c index d659761..3921dc4 100644 --- a/pre_gen/mpi.c +++ b/pre_gen/mpi.c @@ -53,6 +53,7 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) mp_int x, y, u, v, B, D; int res, neg; + /* init all our temps */ if ((res = mp_init (&x)) != MP_OKAY) { goto __ERR; } @@ -85,6 +86,7 @@ fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c) goto __D; } + /* we need |y| */ if ((res = mp_abs (&y, &y)) != MP_OKAY) { goto __D; } @@ -120,13 +122,12 @@ top: goto __D; } } - /* A = A/2, B = B/2 */ + /* B = B/2 */ if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { goto __D; } } - /* 5. while v is even do */ while (mp_iseven (&v) == 1) { /* 5.1 v = v/2 */ @@ -135,12 +136,12 @@ top: } /* 5.2 if C,D are even then */ if (mp_iseven (&D) == 0) { - /* C = (C+y)/2, D = (D-x)/2 */ + /* D = (D-x)/2 */ if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { goto __D; } } - /* C = C/2, D = D/2 */ + /* D = D/2 */ if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { goto __D; } @@ -148,7 +149,7 @@ top: /* 6. if u >= v then */ if (mp_cmp (&u, &v) != MP_LT) { - /* u = u - v, A = A - C, B = B - D */ + /* u = u - v, B = B - D */ if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { goto __D; } @@ -157,7 +158,7 @@ top: goto __D; } } else { - /* v - v - u, C = C - A, D = D - B */ + /* v - v - u, D = D - B */ if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { goto __D; } @@ -251,12 +252,12 @@ fast_mp_montgomery_reduce (mp_int * a, mp_int * m, mp_digit mp) _W = W; tmpa = a->dp; - /* copy the digits of a */ + /* copy the digits of a into W[0..a->used-1] */ for (ix = 0; ix < a->used; ix++) { *_W++ = *tmpa++; } - /* zero the high words */ + /* zero the high words of W[a->used..m->used*2] */ for (; ix < m->used * 2 + 1; ix++) { *_W++ = 0; } @@ -901,7 +902,7 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) mp_int t; int res; - if ((res = mp_init (&t)) != MP_OKAY) { + if ((res = mp_init_size(&t, 1)) != MP_OKAY) { return res; } mp_set (&t, b); @@ -995,8 +996,9 @@ mp_and (mp_int * a, mp_int * b, mp_int * c) void mp_clamp (mp_int * a) { - while (a->used > 0 && a->dp[a->used - 1] == 0) + while (a->used > 0 && a->dp[a->used - 1] == 0) { --(a->used); + } if (a->used == 0) { a->sign = MP_ZPOS; } @@ -1197,6 +1199,7 @@ mp_copy (mp_int * a, mp_int * b) { register mp_digit *tmpa, *tmpb; + /* point aliases */ tmpa = a->dp; tmpb = b->dp; @@ -1331,17 +1334,19 @@ mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d) x.sign = y.sign = MP_ZPOS; /* normalize both x and y, ensure that y >= b/2, [b == 2^DIGIT_BIT] */ - norm = 0; - while ((y.dp[y.used - 1] & (((mp_digit) 1) << (DIGIT_BIT - 1))) == ((mp_digit) 0)) { - ++norm; - if ((res = mp_mul_2 (&x, &x)) != MP_OKAY) { - goto __Y; - } - if ((res = mp_mul_2 (&y, &y)) != MP_OKAY) { - goto __Y; - } + norm = mp_count_bits(&y) % DIGIT_BIT; + if (norm < (DIGIT_BIT-1)) { + norm = (DIGIT_BIT-1) - norm; + if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { + goto __Y; + } + if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { + goto __Y; + } + } else { + norm = 0; } - + /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ n = x.used - 1; t = y.used - 1; @@ -1491,15 +1496,26 @@ mp_div_2 (mp_int * a, mp_int * b) { register mp_digit r, rr, *tmpa, *tmpb; + /* source alias */ tmpa = a->dp + b->used - 1; + + /* dest alias */ tmpb = b->dp + b->used - 1; + + /* carry */ r = 0; for (x = b->used - 1; x >= 0; x--) { + /* get the carry for the next iteration */ rr = *tmpa & 1; + + /* shift the current digit, add in carry and store */ *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); + + /* forward carry to next iteration */ r = rr; } + /* zero excess digits */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; @@ -1573,13 +1589,23 @@ mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) /* shift any bit count < DIGIT_BIT */ D = (mp_digit) (b % DIGIT_BIT); if (D != 0) { + register mp_digit *tmpc, mask; + + /* mask */ + mask = (1U << D) - 1U; + + /* alias */ + tmpc = c->dp + (c->used - 1); + + /* carry */ r = 0; for (x = c->used - 1; x >= 0; x--) { /* get the lower bits of this word in a temp */ - rr = c->dp[x] & ((mp_digit) ((1U << D) - 1U)); + rr = *tmpc & mask; /* shift the current word and mix in the carry bits from the previous word */ - c->dp[x] = (c->dp[x] >> D) | (r << (DIGIT_BIT - D)); + *tmpc = (*tmpc >> D) | (r << (DIGIT_BIT - D)); + --tmpc; /* set the carry to the carry bits of the current word found above */ r = rr; @@ -1632,6 +1658,7 @@ mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d) mp_set (&t, b); res = mp_div (a, &t, c, &t2); + /* set remainder if not null */ if (d != NULL) { *d = t2.dp[0]; } @@ -1814,6 +1841,9 @@ void mp_dr_setup(mp_int *a, mp_digit *d) */ #include +/* swap the elements of two integers, for cases where you can't simply swap the + * mp_int pointers around + */ void mp_exch (mp_int * a, mp_int * b) { @@ -1993,7 +2023,7 @@ f_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) mode = 2; if (bitcpy == winsize) { - /* ok window is filled so square as required and multiply multiply */ + /* ok window is filled so square as required and multiply */ /* square first */ for (x = 0; x < winsize; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { @@ -2077,7 +2107,7 @@ __M: * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. * The value of k changes based on the size of the exponent. * - * Uses Montgomery reduction + * Uses Montgomery or Diminished Radix reduction [whichever appropriate] */ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) @@ -2087,7 +2117,6 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; int (*redux)(mp_int*,mp_int*,mp_digit); - /* find window size */ x = mp_count_bits (X); if (x <= 7) { @@ -2227,7 +2256,7 @@ mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) mode = 2; if (bitcpy == winsize) { - /* ok window is filled so square as required and multiply multiply */ + /* ok window is filled so square as required and multiply */ /* square first */ for (x = 0; x < winsize; x++) { if ((err = mp_sqr (&res, &res)) != MP_OKAY) { @@ -2312,13 +2341,13 @@ __M: */ #include +/* calculate c = a^b using a square-multiply algorithm */ int mp_expt_d (mp_int * a, mp_digit b, mp_int * c) { int res, x; mp_int g; - if ((res = mp_init_copy (&g, a)) != MP_OKAY) { return res; } @@ -2327,11 +2356,13 @@ mp_expt_d (mp_int * a, mp_digit b, mp_int * c) mp_set (c, 1); for (x = 0; x < (int) DIGIT_BIT; x++) { + /* square */ if ((res = mp_sqr (c, c)) != MP_OKAY) { mp_clear (&g); return res; } + /* if the bit is set multiply */ if ((b & (mp_digit) (1 << (DIGIT_BIT - 1))) != 0) { if ((res = mp_mul (c, &g, c)) != MP_OKAY) { mp_clear (&g); @@ -2339,6 +2370,7 @@ mp_expt_d (mp_int * a, mp_digit b, mp_int * c) } } + /* shift to next bit */ b <<= 1; } @@ -2373,7 +2405,6 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) mp_int u, v, t; int k, res, neg; - /* either zero than gcd is the largest */ if (mp_iszero (a) == 1 && mp_iszero (b) == 0) { return mp_copy (b, c); @@ -2406,7 +2437,7 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) /* B1. Find power of two */ k = 0; - while ((u.dp[0] & 1) == 0 && (v.dp[0] & 1) == 0) { + while (mp_iseven(&u) == 1 && mp_iseven(&v) == 1) { ++k; if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { goto __T; @@ -2417,12 +2448,14 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) } /* B2. Initialize */ - if ((u.dp[0] & 1) == 1) { + if (mp_isodd(&u) == 1) { + /* t = -v */ if ((res = mp_copy (&v, &t)) != MP_OKAY) { goto __T; } t.sign = MP_NEG; } else { + /* t = u */ if ((res = mp_copy (&u, &t)) != MP_OKAY) { goto __T; } @@ -2430,7 +2463,7 @@ mp_gcd (mp_int * a, mp_int * b, mp_int * c) do { /* B3 (and B4). Halve t, if even */ - while (t.used != 0 && (t.dp[0] & 1) == 0) { + while (t.used != 0 && mp_iseven(&t) == 1) { if ((res = mp_div_2 (&t, &t)) != MP_OKAY) { goto __T; } @@ -2495,13 +2528,15 @@ mp_grow (mp_int * a, int size) /* if the alloc size is smaller alloc more ram */ if (a->alloc < size) { - size += (MP_PREC * 2) - (size & (MP_PREC - 1)); /* ensure there are always at least MP_PREC digits extra on top */ + /* ensure there are always at least MP_PREC digits extra on top */ + size += (MP_PREC * 2) - (size & (MP_PREC - 1)); a->dp = OPT_CAST realloc (a->dp, sizeof (mp_digit) * size); if (a->dp == NULL) { return MP_MEM; } + /* zero excess digits */ n = a->alloc; a->alloc = size; for (i = n; i < a->alloc; i++) { @@ -2543,9 +2578,9 @@ mp_init (mp_int * a) /* set the used to zero, allocated digit to the default precision * and sign to positive */ - a->used = 0; + a->used = 0; a->alloc = MP_PREC; - a->sign = MP_ZPOS; + a->sign = MP_ZPOS; return MP_OKAY; } @@ -2605,8 +2640,10 @@ int mp_init_size (mp_int * a, int size) { - /* pad up so there are at least 16 zero digits */ - size += (MP_PREC * 2) - (size & (MP_PREC - 1)); /* ensure there are always at least 16 digits extra on top */ + /* pad size so there are always extra digits */ + size += (MP_PREC * 2) - (size & (MP_PREC - 1)); + + /* alloc mem */ a->dp = OPT_CAST calloc (sizeof (mp_digit), size); if (a->dp == NULL) { return MP_MEM; @@ -3795,17 +3832,29 @@ mp_mul_2 (mp_int * a, mp_int * b) { register mp_digit r, rr, *tmpa, *tmpb; - r = 0; + /* alias for source */ tmpa = a->dp; + + /* alias for dest */ tmpb = b->dp; + + /* carry */ + r = 0; for (x = 0; x < b->used; x++) { + + /* get what will be the *next* carry bit from the MSB of the current digit */ rr = *tmpa >> (DIGIT_BIT - 1); + + /* now shift up this digit, add in the carry [from the previous] */ *tmpb++ = ((*tmpa++ << 1) | r) & MP_MASK; + + /* copy the carry that would be from the source digit into the next iteration */ r = rr; } /* new leading digit? */ if (r != 0) { + /* do we have to grow to accomodate the new digit? */ if (b->alloc == b->used) { if ((res = mp_grow (b, b->used + 1)) != MP_OKAY) { return res; @@ -3816,11 +3865,12 @@ mp_mul_2 (mp_int * a, mp_int * b) */ tmpb = b->dp + b->used; } - /* add a MSB of 1 */ + /* add a MSB which is always 1 at this point */ *tmpb = 1; ++b->used; } + /* now zero any excess digits on the destination that we didn't write to */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; @@ -3856,7 +3906,6 @@ mp_mul_2d (mp_int * a, int b, mp_int * c) mp_digit d, r, rr; int x, res; - /* copy */ if ((res = mp_copy (a, c)) != MP_OKAY) { return res; @@ -3877,13 +3926,23 @@ mp_mul_2d (mp_int * a, int b, mp_int * c) /* shift any bit count < DIGIT_BIT */ d = (mp_digit) (b % DIGIT_BIT); if (d != 0) { - r = 0; + register mp_digit *tmpc, mask; + + /* bitmask for carries */ + mask = (1U << d) - 1U; + + /* alias */ + tmpc = c->dp; + + /* carry */ + r = 0; for (x = 0; x < c->used; x++) { /* get the higher bits of the current word */ - rr = (c->dp[x] >> (DIGIT_BIT - d)) & ((mp_digit) ((1U << d) - 1U)); + rr = (*tmpc >> (DIGIT_BIT - d)) & mask; /* shift the current word and OR in the carry */ - c->dp[x] = ((c->dp[x] << d) | r) & MP_MASK; + *tmpc = ((*tmpc << d) | r) & MP_MASK; + ++tmpc; /* set the carry to the carry bits of the current word */ r = rr; @@ -5401,7 +5460,6 @@ const mp_digit __prime_tab[] = { /* chars used in radix conversions */ static const char *s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; - /* read a string [ASCII] in a given radix */ int mp_read_radix (mp_int * a, char *str, int radix) diff --git a/tommath.h b/tommath.h index d8f8d9d..cfd9da1 100644 --- a/tommath.h +++ b/tommath.h @@ -124,6 +124,12 @@ void mp_exch(mp_int *a, mp_int *b); /* shrink ram required for a bignum */ int mp_shrink(mp_int *a); +/* grow an int to a given size */ +int mp_grow(mp_int *a, int size); + +/* init to a given number of digits */ +int mp_init_size(mp_int *a, int size); + /* ---> Basic Manipulations <--- */ #define mp_iszero(a) (((a)->used == 0) ? 1 : 0) @@ -139,12 +145,6 @@ void mp_set(mp_int *a, mp_digit b); /* set a 32-bit const */ int mp_set_int(mp_int *a, unsigned long b); -/* grow an int to a given size */ -int mp_grow(mp_int *a, int size); - -/* init to a given number of digits */ -int mp_init_size(mp_int *a, int size); - /* copy, b = a */ int mp_copy(mp_int *a, mp_int *b);