diff --git a/TODO b/TODO deleted file mode 100644 index deffba1..0000000 --- a/TODO +++ /dev/null @@ -1,16 +0,0 @@ -things for book in order of importance... - -- Fix up pseudo-code [only] for combas that are not consistent with source -- Start in chapter 3 [basics] and work up... - - re-write to prose [less abrupt] - - clean up pseudo code [spacing] - - more examples where appropriate and figures - -Goal: - - Get sync done by mid January [roughly 8-12 hours work] - - Finish ch3-6 by end of January [roughly 12-16 hours of work] - - Finish ch7-end by mid Feb [roughly 20-24 hours of work]. - -Goal isn't "first edition" but merely cleaner to read. - - diff --git a/bn.pdf b/bn.pdf index 615ff4e..b8b8f8e 100644 Binary files a/bn.pdf and b/bn.pdf differ diff --git a/bn.tex b/bn.tex index 244bd6f..8b37766 100644 --- a/bn.tex +++ b/bn.tex @@ -49,7 +49,7 @@ \begin{document} \frontmatter \pagestyle{empty} -\title{LibTomMath User Manual \\ v0.35} +\title{LibTomMath User Manual \\ v0.36} \author{Tom St Denis \\ tomstdenis@iahu.ca} \maketitle This text, the library and the accompanying textbook are all hereby placed in the public domain. This book has been diff --git a/bn_error.c b/bn_error.c index 1546784..f58387d 100644 --- a/bn_error.c +++ b/bn_error.c @@ -41,3 +41,7 @@ char *mp_error_to_string(int code) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_fast_mp_invmod.c b/bn_fast_mp_invmod.c index acc8364..aa89dd7 100644 --- a/bn_fast_mp_invmod.c +++ b/bn_fast_mp_invmod.c @@ -142,3 +142,7 @@ LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_fast_mp_montgomery_reduce.c b/bn_fast_mp_montgomery_reduce.c index 14f307f..518f2d4 100644 --- a/bn_fast_mp_montgomery_reduce.c +++ b/bn_fast_mp_montgomery_reduce.c @@ -166,3 +166,7 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_fast_s_mp_mul_digs.c b/bn_fast_s_mp_mul_digs.c index df3da26..a0ae08c 100644 --- a/bn_fast_s_mp_mul_digs.c +++ b/bn_fast_s_mp_mul_digs.c @@ -70,6 +70,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) /* execute loop */ for (iz = 0; iz < iy; ++iz) { _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } /* store term */ @@ -103,3 +104,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_fast_s_mp_mul_high_digs.c b/bn_fast_s_mp_mul_high_digs.c index ee657f9..61d42dd 100644 --- a/bn_fast_s_mp_mul_high_digs.c +++ b/bn_fast_s_mp_mul_high_digs.c @@ -95,3 +95,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_fast_s_mp_sqr.c b/bn_fast_s_mp_sqr.c index 66a2942..7a5642c 100644 --- a/bn_fast_s_mp_sqr.c +++ b/bn_fast_s_mp_sqr.c @@ -108,3 +108,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_2expt.c b/bn_mp_2expt.c index 45a6818..e24cae4 100644 --- a/bn_mp_2expt.c +++ b/bn_mp_2expt.c @@ -42,3 +42,7 @@ mp_2expt (mp_int * a, int b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_abs.c b/bn_mp_abs.c index 34f810f..f8d50d8 100644 --- a/bn_mp_abs.c +++ b/bn_mp_abs.c @@ -37,3 +37,7 @@ mp_abs (mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_add.c b/bn_mp_add.c index 554b7f7..bfd827b 100644 --- a/bn_mp_add.c +++ b/bn_mp_add.c @@ -47,3 +47,7 @@ int mp_add (mp_int * a, mp_int * b, mp_int * c) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_add_d.c b/bn_mp_add_d.c index bdd0280..0300fe0 100644 --- a/bn_mp_add_d.c +++ b/bn_mp_add_d.c @@ -103,3 +103,7 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_addmod.c b/bn_mp_addmod.c index 13eb33f..d1d07d8 100644 --- a/bn_mp_addmod.c +++ b/bn_mp_addmod.c @@ -35,3 +35,7 @@ mp_addmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_and.c b/bn_mp_and.c index 61dc386..cb8e54c 100644 --- a/bn_mp_and.c +++ b/bn_mp_and.c @@ -51,3 +51,7 @@ mp_and (mp_int * a, mp_int * b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_clamp.c b/bn_mp_clamp.c index c172611..3fc6b4d 100644 --- a/bn_mp_clamp.c +++ b/bn_mp_clamp.c @@ -38,3 +38,7 @@ mp_clamp (mp_int * a) } } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_clear.c b/bn_mp_clear.c index 5342648..46aa421 100644 --- a/bn_mp_clear.c +++ b/bn_mp_clear.c @@ -38,3 +38,7 @@ mp_clear (mp_int * a) } } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_clear_multi.c b/bn_mp_clear_multi.c index 24cbe73..2fdf125 100644 --- a/bn_mp_clear_multi.c +++ b/bn_mp_clear_multi.c @@ -28,3 +28,7 @@ void mp_clear_multi(mp_int *mp, ...) va_end(args); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_cmp.c b/bn_mp_cmp.c index 583b5f8..2348066 100644 --- a/bn_mp_cmp.c +++ b/bn_mp_cmp.c @@ -37,3 +37,7 @@ mp_cmp (mp_int * a, mp_int * b) } } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_cmp_d.c b/bn_mp_cmp_d.c index 882b1c9..3843911 100644 --- a/bn_mp_cmp_d.c +++ b/bn_mp_cmp_d.c @@ -38,3 +38,7 @@ int mp_cmp_d(mp_int * a, mp_digit b) } } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_cmp_mag.c b/bn_mp_cmp_mag.c index a0f351c..45e9c6b 100644 --- a/bn_mp_cmp_mag.c +++ b/bn_mp_cmp_mag.c @@ -49,3 +49,7 @@ int mp_cmp_mag (mp_int * a, mp_int * b) return MP_EQ; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_cnt_lsb.c b/bn_mp_cnt_lsb.c index 571f03f..03d694b 100644 --- a/bn_mp_cnt_lsb.c +++ b/bn_mp_cnt_lsb.c @@ -47,3 +47,7 @@ int mp_cnt_lsb(mp_int *a) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_copy.c b/bn_mp_copy.c index 183ec9b..701b489 100644 --- a/bn_mp_copy.c +++ b/bn_mp_copy.c @@ -62,3 +62,7 @@ mp_copy (mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_count_bits.c b/bn_mp_count_bits.c index f3f85ac..52a9907 100644 --- a/bn_mp_count_bits.c +++ b/bn_mp_count_bits.c @@ -39,3 +39,7 @@ mp_count_bits (mp_int * a) return r; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_div.c b/bn_mp_div.c index 6b2b8f0..f031f53 100644 --- a/bn_mp_div.c +++ b/bn_mp_div.c @@ -286,3 +286,7 @@ LBL_Q:mp_clear (&q); #endif #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_div_2.c b/bn_mp_div_2.c index 5777997..4566580 100644 --- a/bn_mp_div_2.c +++ b/bn_mp_div_2.c @@ -62,3 +62,7 @@ int mp_div_2(mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_div_2d.c b/bn_mp_div_2d.c index cf103f2..4c9bbdd 100644 --- a/bn_mp_div_2d.c +++ b/bn_mp_div_2d.c @@ -91,3 +91,7 @@ int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_div_3.c b/bn_mp_div_3.c index 7cbafc1..f386109 100644 --- a/bn_mp_div_3.c +++ b/bn_mp_div_3.c @@ -73,3 +73,7 @@ mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_div_d.c b/bn_mp_div_d.c index 9b58aa6..e93bfda 100644 --- a/bn_mp_div_d.c +++ b/bn_mp_div_d.c @@ -104,3 +104,7 @@ int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_dr_is_modulus.c b/bn_mp_dr_is_modulus.c index 5ef78a3..6dfd9b6 100644 --- a/bn_mp_dr_is_modulus.c +++ b/bn_mp_dr_is_modulus.c @@ -37,3 +37,7 @@ int mp_dr_is_modulus(mp_int *a) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_dr_reduce.c b/bn_mp_dr_reduce.c index 9bb7ad7..988c08e 100644 --- a/bn_mp_dr_reduce.c +++ b/bn_mp_dr_reduce.c @@ -88,3 +88,7 @@ top: return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_dr_setup.c b/bn_mp_dr_setup.c index 029d310..e17c052 100644 --- a/bn_mp_dr_setup.c +++ b/bn_mp_dr_setup.c @@ -26,3 +26,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_exch.c b/bn_mp_exch.c index 0ef485a..f1cbb1d 100644 --- a/bn_mp_exch.c +++ b/bn_mp_exch.c @@ -28,3 +28,7 @@ mp_exch (mp_int * a, mp_int * b) *b = t; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_expt_d.c b/bn_mp_expt_d.c index fdb8bd9..908bea1 100644 --- a/bn_mp_expt_d.c +++ b/bn_mp_expt_d.c @@ -51,3 +51,7 @@ int mp_expt_d (mp_int * a, mp_digit b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_exptmod.c b/bn_mp_exptmod.c index 7c4e2f8..2514e2c 100644 --- a/bn_mp_exptmod.c +++ b/bn_mp_exptmod.c @@ -66,7 +66,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) } /* modified diminished radix reduction */ -#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) +#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C) if (mp_reduce_is_2k_l(P) == MP_YES) { return s_mp_exptmod(G, X, P, Y, 1); } @@ -106,3 +106,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_exptmod_fast.c b/bn_mp_exptmod_fast.c index 82be9ac..7073dee 100644 --- a/bn_mp_exptmod_fast.c +++ b/bn_mp_exptmod_fast.c @@ -315,3 +315,7 @@ LBL_M: } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_exteuclid.c b/bn_mp_exteuclid.c index c4ebab4..9a1f16c 100644 --- a/bn_mp_exteuclid.c +++ b/bn_mp_exteuclid.c @@ -76,3 +76,7 @@ _ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_fread.c b/bn_mp_fread.c index 293df3f..5fa23f9 100644 --- a/bn_mp_fread.c +++ b/bn_mp_fread.c @@ -61,3 +61,7 @@ int mp_fread(mp_int *a, int radix, FILE *stream) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_fwrite.c b/bn_mp_fwrite.c index 8fa3129..e70e155 100644 --- a/bn_mp_fwrite.c +++ b/bn_mp_fwrite.c @@ -46,3 +46,7 @@ int mp_fwrite(mp_int *a, int radix, FILE *stream) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_gcd.c b/bn_mp_gcd.c index 6265df1..db03dbb 100644 --- a/bn_mp_gcd.c +++ b/bn_mp_gcd.c @@ -107,3 +107,7 @@ LBL_U:mp_clear (&v); return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_get_int.c b/bn_mp_get_int.c index 034467b..25942db 100644 --- a/bn_mp_get_int.c +++ b/bn_mp_get_int.c @@ -39,3 +39,7 @@ unsigned long mp_get_int(mp_int * a) return res & 0xFFFFFFFFUL; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_grow.c b/bn_mp_grow.c index 12a78a8..af987a3 100644 --- a/bn_mp_grow.c +++ b/bn_mp_grow.c @@ -51,3 +51,7 @@ int mp_grow (mp_int * a, int size) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_init.c b/bn_mp_init.c index 9d70554..6e935de 100644 --- a/bn_mp_init.c +++ b/bn_mp_init.c @@ -40,3 +40,7 @@ int mp_init (mp_int * a) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_init_copy.c b/bn_mp_init_copy.c index b1b0fa2..1d00607 100644 --- a/bn_mp_init_copy.c +++ b/bn_mp_init_copy.c @@ -26,3 +26,7 @@ int mp_init_copy (mp_int * a, mp_int * b) return mp_copy (b, a); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_init_multi.c b/bn_mp_init_multi.c index 8cb123a..de23432 100644 --- a/bn_mp_init_multi.c +++ b/bn_mp_init_multi.c @@ -53,3 +53,7 @@ int mp_init_multi(mp_int *mp, ...) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_init_set.c b/bn_mp_init_set.c index 0251e61..a7380a8 100644 --- a/bn_mp_init_set.c +++ b/bn_mp_init_set.c @@ -26,3 +26,7 @@ int mp_init_set (mp_int * a, mp_digit b) return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_init_set_int.c b/bn_mp_init_set_int.c index f59fd19..793ad86 100644 --- a/bn_mp_init_set_int.c +++ b/bn_mp_init_set_int.c @@ -25,3 +25,7 @@ int mp_init_set_int (mp_int * a, unsigned long b) return mp_set_int(a, b); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_init_size.c b/bn_mp_init_size.c index 845ce2c..3e5724a 100644 --- a/bn_mp_init_size.c +++ b/bn_mp_init_size.c @@ -42,3 +42,7 @@ int mp_init_size (mp_int * a, int size) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_invmod.c b/bn_mp_invmod.c index 46118ad..d4e4e5f 100644 --- a/bn_mp_invmod.c +++ b/bn_mp_invmod.c @@ -37,3 +37,7 @@ int mp_invmod (mp_int * a, mp_int * b, mp_int * c) return MP_VAL; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_invmod_slow.c b/bn_mp_invmod_slow.c index c048655..325282f 100644 --- a/bn_mp_invmod_slow.c +++ b/bn_mp_invmod_slow.c @@ -169,3 +169,7 @@ LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_is_square.c b/bn_mp_is_square.c index 969d237..42be22d 100644 --- a/bn_mp_is_square.c +++ b/bn_mp_is_square.c @@ -103,3 +103,7 @@ ERR:mp_clear(&t); return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_jacobi.c b/bn_mp_jacobi.c index 74cbbf3..e21ee4b 100644 --- a/bn_mp_jacobi.c +++ b/bn_mp_jacobi.c @@ -99,3 +99,7 @@ LBL_A1:mp_clear (&a1); return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_karatsuba_mul.c b/bn_mp_karatsuba_mul.c index daa78c7..fe49694 100644 --- a/bn_mp_karatsuba_mul.c +++ b/bn_mp_karatsuba_mul.c @@ -26,12 +26,12 @@ * b = b1 * B**n + b0 * * Then, a * b => - a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0 + a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 * * Note that a1b1 and a0b0 are used twice and only need to be * computed once. So in total three half size (half # of * digit) multiplications are performed, a0b0, a1b1 and - * (a1-b1)(a0-b0) + * (a1+b1)(a0+b0) * * Note that a multiplication of half the digits requires * 1/4th the number of single precision multiplications so in @@ -122,19 +122,19 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) goto X1Y1; /* x1y1 = x1*y1 */ - /* now calc x1-x0 and y1-y0 */ - if (mp_sub (&x1, &x0, &t1) != MP_OKAY) + /* now calc x1+x0 and y1+y0 */ + if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = x1 - x0 */ - if (mp_sub (&y1, &y0, &x0) != MP_OKAY) + if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) goto X1Y1; /* t2 = y1 - y0 */ if (mp_mul (&t1, &x0, &t1) != MP_OKAY) - goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ + goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ /* add x0y0 */ if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) goto X1Y1; /* t2 = x0y0 + x1y1 */ - if (mp_sub (&x0, &t1, &t1) != MP_OKAY) - goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ + if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) + goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) @@ -161,3 +161,7 @@ ERR: return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_karatsuba_sqr.c b/bn_mp_karatsuba_sqr.c index 315ceab..ff8a1f6 100644 --- a/bn_mp_karatsuba_sqr.c +++ b/bn_mp_karatsuba_sqr.c @@ -80,8 +80,8 @@ int mp_karatsuba_sqr (mp_int * a, mp_int * b) if (mp_sqr (&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ - /* now calc (x1-x0)**2 */ - if (mp_sub (&x1, &x0, &t1) != MP_OKAY) + /* now calc (x1+x0)**2 */ + if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr (&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ @@ -89,8 +89,8 @@ int mp_karatsuba_sqr (mp_int * a, mp_int * b) /* add x0y0 */ if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ - if (mp_sub (&t2, &t1, &t1) != MP_OKAY) - goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */ + if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) + goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) @@ -115,3 +115,7 @@ ERR: return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_lcm.c b/bn_mp_lcm.c index 8e3a759..66c2c8e 100644 --- a/bn_mp_lcm.c +++ b/bn_mp_lcm.c @@ -54,3 +54,7 @@ LBL_T: return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_lshd.c b/bn_mp_lshd.c index 398b648..79e1e21 100644 --- a/bn_mp_lshd.c +++ b/bn_mp_lshd.c @@ -61,3 +61,7 @@ int mp_lshd (mp_int * a, int b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mod.c b/bn_mp_mod.c index 75779bb..364b1f9 100644 --- a/bn_mp_mod.c +++ b/bn_mp_mod.c @@ -42,3 +42,7 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c) return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mod_2d.c b/bn_mp_mod_2d.c index 589e4ba..f191008 100644 --- a/bn_mp_mod_2d.c +++ b/bn_mp_mod_2d.c @@ -49,3 +49,7 @@ mp_mod_2d (mp_int * a, int b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mod_d.c b/bn_mp_mod_d.c index 8a2ad24..5ac6fff 100644 --- a/bn_mp_mod_d.c +++ b/bn_mp_mod_d.c @@ -21,3 +21,7 @@ mp_mod_d (mp_int * a, mp_digit b, mp_digit * c) return mp_div_d(a, b, NULL, c); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_montgomery_calc_normalization.c b/bn_mp_montgomery_calc_normalization.c index e2efc34..a8c4582 100644 --- a/bn_mp_montgomery_calc_normalization.c +++ b/bn_mp_montgomery_calc_normalization.c @@ -53,3 +53,7 @@ int mp_montgomery_calc_normalization (mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_montgomery_reduce.c b/bn_mp_montgomery_reduce.c index 3095fa7..fedfbb7 100644 --- a/bn_mp_montgomery_reduce.c +++ b/bn_mp_montgomery_reduce.c @@ -112,3 +112,7 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_montgomery_setup.c b/bn_mp_montgomery_setup.c index 9dfc087..28a3716 100644 --- a/bn_mp_montgomery_setup.c +++ b/bn_mp_montgomery_setup.c @@ -53,3 +53,7 @@ mp_montgomery_setup (mp_int * n, mp_digit * rho) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mul.c b/bn_mp_mul.c index f9cfa09..e13e4c9 100644 --- a/bn_mp_mul.c +++ b/bn_mp_mul.c @@ -60,3 +60,7 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c) return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mul_2.c b/bn_mp_mul_2.c index 6936681..65416a2 100644 --- a/bn_mp_mul_2.c +++ b/bn_mp_mul_2.c @@ -76,3 +76,7 @@ int mp_mul_2(mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mul_2d.c b/bn_mp_mul_2d.c index 04cb8dd..671b31e 100644 --- a/bn_mp_mul_2d.c +++ b/bn_mp_mul_2d.c @@ -79,3 +79,7 @@ int mp_mul_2d (mp_int * a, int b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mul_d.c b/bn_mp_mul_d.c index 9e11eef..7944d9c 100644 --- a/bn_mp_mul_d.c +++ b/bn_mp_mul_d.c @@ -73,3 +73,7 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_mulmod.c b/bn_mp_mulmod.c index d34e90a..83b3449 100644 --- a/bn_mp_mulmod.c +++ b/bn_mp_mulmod.c @@ -16,8 +16,7 @@ */ /* d = a * b (mod c) */ -int -mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) { int res; mp_int t; @@ -35,3 +34,7 @@ mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_n_root.c b/bn_mp_n_root.c index 7b11aa2..fef65e3 100644 --- a/bn_mp_n_root.c +++ b/bn_mp_n_root.c @@ -126,3 +126,7 @@ LBL_T1:mp_clear (&t1); return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_neg.c b/bn_mp_neg.c index 159cd74..587fe60 100644 --- a/bn_mp_neg.c +++ b/bn_mp_neg.c @@ -34,3 +34,7 @@ int mp_neg (mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_or.c b/bn_mp_or.c index dccee7e..1655e39 100644 --- a/bn_mp_or.c +++ b/bn_mp_or.c @@ -44,3 +44,7 @@ int mp_or (mp_int * a, mp_int * b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_prime_fermat.c b/bn_mp_prime_fermat.c index fd74dbe..59bcb86 100644 --- a/bn_mp_prime_fermat.c +++ b/bn_mp_prime_fermat.c @@ -56,3 +56,7 @@ LBL_T:mp_clear (&t); return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_prime_is_divisible.c b/bn_mp_prime_is_divisible.c index f85fe7c..1d30653 100644 --- a/bn_mp_prime_is_divisible.c +++ b/bn_mp_prime_is_divisible.c @@ -44,3 +44,7 @@ int mp_prime_is_divisible (mp_int * a, int *result) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_prime_is_prime.c b/bn_mp_prime_is_prime.c index 188053a..d45bf58 100644 --- a/bn_mp_prime_is_prime.c +++ b/bn_mp_prime_is_prime.c @@ -77,3 +77,7 @@ LBL_B:mp_clear (&b); return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_prime_miller_rabin.c b/bn_mp_prime_miller_rabin.c index 758a2c3..fbe055b 100644 --- a/bn_mp_prime_miller_rabin.c +++ b/bn_mp_prime_miller_rabin.c @@ -97,3 +97,7 @@ LBL_N1:mp_clear (&n1); return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_prime_next_prime.c b/bn_mp_prime_next_prime.c index 24f93c4..7b2be90 100644 --- a/bn_mp_prime_next_prime.c +++ b/bn_mp_prime_next_prime.c @@ -164,3 +164,7 @@ LBL_ERR: } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_prime_rabin_miller_trials.c b/bn_mp_prime_rabin_miller_trials.c index d1d0867..4bcf74e 100644 --- a/bn_mp_prime_rabin_miller_trials.c +++ b/bn_mp_prime_rabin_miller_trials.c @@ -46,3 +46,7 @@ int mp_prime_rabin_miller_trials(int size) #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_prime_random_ex.c b/bn_mp_prime_random_ex.c index 78c0583..98f3dbb 100644 --- a/bn_mp_prime_random_ex.c +++ b/bn_mp_prime_random_ex.c @@ -62,10 +62,8 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback maskOR_msb = 0; maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0; if (flags & LTM_PRIME_2MSB_ON) { - maskOR_msb |= 1 << ((size - 2) & 7); - } else if (flags & LTM_PRIME_2MSB_OFF) { - maskAND &= ~(1 << ((size - 2) & 7)); - } + maskOR_msb |= 0x80 >> ((9 - size) & 7); + } /* get the maskOR_lsb */ maskOR_lsb = 1; @@ -121,3 +119,7 @@ error: #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_radix_size.c b/bn_mp_radix_size.c index 3d423ba..346ec41 100644 --- a/bn_mp_radix_size.c +++ b/bn_mp_radix_size.c @@ -72,3 +72,7 @@ int mp_radix_size (mp_int * a, int radix, int *size) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_radix_smap.c b/bn_mp_radix_smap.c index bc7517d..7a8aa49 100644 --- a/bn_mp_radix_smap.c +++ b/bn_mp_radix_smap.c @@ -18,3 +18,7 @@ /* chars used in radix conversions */ const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_rand.c b/bn_mp_rand.c index 0dc7019..6dc2abe 100644 --- a/bn_mp_rand.c +++ b/bn_mp_rand.c @@ -49,3 +49,7 @@ mp_rand (mp_int * a, int digits) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_read_radix.c b/bn_mp_read_radix.c index 1ec3937..25aed05 100644 --- a/bn_mp_read_radix.c +++ b/bn_mp_read_radix.c @@ -76,3 +76,7 @@ int mp_read_radix (mp_int * a, const char *str, int radix) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_read_signed_bin.c b/bn_mp_read_signed_bin.c index 814d6c1..0b913fd 100644 --- a/bn_mp_read_signed_bin.c +++ b/bn_mp_read_signed_bin.c @@ -16,8 +16,7 @@ */ /* read signed bin, big endian, first byte is 0==positive or 1==negative */ -int -mp_read_signed_bin (mp_int * a, unsigned char *b, int c) +int mp_read_signed_bin (mp_int * a, const unsigned char *b, int c) { int res; @@ -36,3 +35,7 @@ mp_read_signed_bin (mp_int * a, unsigned char *b, int c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_read_unsigned_bin.c b/bn_mp_read_unsigned_bin.c index 946457d..84b996f 100644 --- a/bn_mp_read_unsigned_bin.c +++ b/bn_mp_read_unsigned_bin.c @@ -16,8 +16,7 @@ */ /* reads a unsigned char array, assumes the msb is stored first [big endian] */ -int -mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c) +int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c) { int res; @@ -50,3 +49,7 @@ mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce.c b/bn_mp_reduce.c index d746445..aa18eab 100644 --- a/bn_mp_reduce.c +++ b/bn_mp_reduce.c @@ -94,3 +94,7 @@ CLEANUP: return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce_2k.c b/bn_mp_reduce_2k.c index 28c3a00..a23fd20 100644 --- a/bn_mp_reduce_2k.c +++ b/bn_mp_reduce_2k.c @@ -55,3 +55,7 @@ ERR: } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce_2k_l.c b/bn_mp_reduce_2k_l.c index 1d7e1f0..638caf4 100644 --- a/bn_mp_reduce_2k_l.c +++ b/bn_mp_reduce_2k_l.c @@ -56,3 +56,7 @@ ERR: } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce_2k_setup.c b/bn_mp_reduce_2k_setup.c index 585e1b7..30b6ff9 100644 --- a/bn_mp_reduce_2k_setup.c +++ b/bn_mp_reduce_2k_setup.c @@ -41,3 +41,7 @@ int mp_reduce_2k_setup(mp_int *a, mp_digit *d) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce_2k_setup_l.c b/bn_mp_reduce_2k_setup_l.c index 810a456..8e21c0e 100644 --- a/bn_mp_reduce_2k_setup_l.c +++ b/bn_mp_reduce_2k_setup_l.c @@ -38,3 +38,7 @@ ERR: return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce_is_2k.c b/bn_mp_reduce_is_2k.c index 0fb8384..c34dcf1 100644 --- a/bn_mp_reduce_is_2k.c +++ b/bn_mp_reduce_is_2k.c @@ -46,3 +46,7 @@ int mp_reduce_is_2k(mp_int *a) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce_is_2k_l.c b/bn_mp_reduce_is_2k_l.c index ceba0ed..e3a7fae 100644 --- a/bn_mp_reduce_is_2k_l.c +++ b/bn_mp_reduce_is_2k_l.c @@ -38,3 +38,7 @@ int mp_reduce_is_2k_l(mp_int *a) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_reduce_setup.c b/bn_mp_reduce_setup.c index 99f158a..46ae229 100644 --- a/bn_mp_reduce_setup.c +++ b/bn_mp_reduce_setup.c @@ -28,3 +28,7 @@ int mp_reduce_setup (mp_int * a, mp_int * b) return mp_div (a, b, a, NULL); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_rshd.c b/bn_mp_rshd.c index 913dda6..37ff66d 100644 --- a/bn_mp_rshd.c +++ b/bn_mp_rshd.c @@ -66,3 +66,7 @@ void mp_rshd (mp_int * a, int b) a->used -= b; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_set.c b/bn_mp_set.c index 078fd5f..eec7bfb 100644 --- a/bn_mp_set.c +++ b/bn_mp_set.c @@ -23,3 +23,7 @@ void mp_set (mp_int * a, mp_digit b) a->used = (a->dp[0] != 0) ? 1 : 0; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_set_int.c b/bn_mp_set_int.c index bd47136..202c70c 100644 --- a/bn_mp_set_int.c +++ b/bn_mp_set_int.c @@ -42,3 +42,7 @@ int mp_set_int (mp_int * a, unsigned long b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_shrink.c b/bn_mp_shrink.c index b31f9d2..b6cda9a 100644 --- a/bn_mp_shrink.c +++ b/bn_mp_shrink.c @@ -29,3 +29,7 @@ int mp_shrink (mp_int * a) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_signed_bin_size.c b/bn_mp_signed_bin_size.c index 30048cb..178187a 100644 --- a/bn_mp_signed_bin_size.c +++ b/bn_mp_signed_bin_size.c @@ -21,3 +21,7 @@ int mp_signed_bin_size (mp_int * a) return 1 + mp_unsigned_bin_size (a); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_sqr.c b/bn_mp_sqr.c index b1fdb57..675a87b 100644 --- a/bn_mp_sqr.c +++ b/bn_mp_sqr.c @@ -52,3 +52,7 @@ if (a->used >= KARATSUBA_SQR_CUTOFF) { return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_sqrmod.c b/bn_mp_sqrmod.c index 1923be4..3cf6ab7 100644 --- a/bn_mp_sqrmod.c +++ b/bn_mp_sqrmod.c @@ -35,3 +35,7 @@ mp_sqrmod (mp_int * a, mp_int * b, mp_int * c) return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_sqrt.c b/bn_mp_sqrt.c index 76cec87..7785737 100644 --- a/bn_mp_sqrt.c +++ b/bn_mp_sqrt.c @@ -75,3 +75,7 @@ E2: mp_clear(&t1); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_sub.c b/bn_mp_sub.c index 97495f4..4714aaf 100644 --- a/bn_mp_sub.c +++ b/bn_mp_sub.c @@ -53,3 +53,7 @@ mp_sub (mp_int * a, mp_int * b, mp_int * c) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_sub_d.c b/bn_mp_sub_d.c index 4923dde..1bba3d0 100644 --- a/bn_mp_sub_d.c +++ b/bn_mp_sub_d.c @@ -83,3 +83,7 @@ mp_sub_d (mp_int * a, mp_digit b, mp_int * c) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_submod.c b/bn_mp_submod.c index b999c85..79fa787 100644 --- a/bn_mp_submod.c +++ b/bn_mp_submod.c @@ -36,3 +36,7 @@ mp_submod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) return res; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_to_signed_bin.c b/bn_mp_to_signed_bin.c index b0a597e..6365659 100644 --- a/bn_mp_to_signed_bin.c +++ b/bn_mp_to_signed_bin.c @@ -27,3 +27,7 @@ int mp_to_signed_bin (mp_int * a, unsigned char *b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_to_signed_bin_n.c b/bn_mp_to_signed_bin_n.c index 0f765ee..bea0762 100644 --- a/bn_mp_to_signed_bin_n.c +++ b/bn_mp_to_signed_bin_n.c @@ -25,3 +25,7 @@ int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) return mp_to_signed_bin(a, b); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_to_unsigned_bin.c b/bn_mp_to_unsigned_bin.c index 000967e..18e3d97 100644 --- a/bn_mp_to_unsigned_bin.c +++ b/bn_mp_to_unsigned_bin.c @@ -42,3 +42,7 @@ int mp_to_unsigned_bin (mp_int * a, unsigned char *b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_to_unsigned_bin_n.c b/bn_mp_to_unsigned_bin_n.c index d0256b4..4a1778b 100644 --- a/bn_mp_to_unsigned_bin_n.c +++ b/bn_mp_to_unsigned_bin_n.c @@ -25,3 +25,7 @@ int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) return mp_to_unsigned_bin(a, b); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_toom_mul.c b/bn_mp_toom_mul.c index 125331b..69de0da 100644 --- a/bn_mp_toom_mul.c +++ b/bn_mp_toom_mul.c @@ -278,3 +278,7 @@ ERR: } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_toom_sqr.c b/bn_mp_toom_sqr.c index 8c46fea..871c75f 100644 --- a/bn_mp_toom_sqr.c +++ b/bn_mp_toom_sqr.c @@ -220,3 +220,7 @@ ERR: } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_toradix.c b/bn_mp_toradix.c index a206d5e..4caeccc 100644 --- a/bn_mp_toradix.c +++ b/bn_mp_toradix.c @@ -69,3 +69,7 @@ int mp_toradix (mp_int * a, char *str, int radix) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_toradix_n.c b/bn_mp_toradix_n.c index 7d43558..48456c3 100644 --- a/bn_mp_toradix_n.c +++ b/bn_mp_toradix_n.c @@ -83,3 +83,7 @@ int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_unsigned_bin_size.c b/bn_mp_unsigned_bin_size.c index 091f406..21be05c 100644 --- a/bn_mp_unsigned_bin_size.c +++ b/bn_mp_unsigned_bin_size.c @@ -22,3 +22,7 @@ int mp_unsigned_bin_size (mp_int * a) return (size / 8 + ((size & 7) != 0 ? 1 : 0)); } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_xor.c b/bn_mp_xor.c index de7e62c..56becb4 100644 --- a/bn_mp_xor.c +++ b/bn_mp_xor.c @@ -45,3 +45,7 @@ mp_xor (mp_int * a, mp_int * b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_mp_zero.c b/bn_mp_zero.c index c8d8907..7e18317 100644 --- a/bn_mp_zero.c +++ b/bn_mp_zero.c @@ -30,3 +30,7 @@ void mp_zero (mp_int * a) } } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_prime_tab.c b/bn_prime_tab.c index 14306c2..ce130ef 100644 --- a/bn_prime_tab.c +++ b/bn_prime_tab.c @@ -55,3 +55,7 @@ const mp_digit ltm_prime_tab[] = { #endif }; #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_reverse.c b/bn_reverse.c index 851a6e8..bcd0649 100644 --- a/bn_reverse.c +++ b/bn_reverse.c @@ -33,3 +33,7 @@ bn_reverse (unsigned char *s, int len) } } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_s_mp_add.c b/bn_s_mp_add.c index 2b378ae..6976e62 100644 --- a/bn_s_mp_add.c +++ b/bn_s_mp_add.c @@ -103,3 +103,7 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_s_mp_exptmod.c b/bn_s_mp_exptmod.c index 597e877..12d981b 100644 --- a/bn_s_mp_exptmod.c +++ b/bn_s_mp_exptmod.c @@ -14,7 +14,6 @@ * * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org */ - #ifdef MP_LOW_MEM #define TAB_SIZE 32 #else @@ -247,3 +246,7 @@ LBL_M: return err; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_s_mp_mul_digs.c b/bn_s_mp_mul_digs.c index b40ae2e..a925e12 100644 --- a/bn_s_mp_mul_digs.c +++ b/bn_s_mp_mul_digs.c @@ -84,3 +84,7 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_s_mp_mul_high_digs.c b/bn_s_mp_mul_high_digs.c index a060248..e9505c8 100644 --- a/bn_s_mp_mul_high_digs.c +++ b/bn_s_mp_mul_high_digs.c @@ -75,3 +75,7 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_s_mp_sqr.c b/bn_s_mp_sqr.c index 9cdb563..4648296 100644 --- a/bn_s_mp_sqr.c +++ b/bn_s_mp_sqr.c @@ -78,3 +78,7 @@ int s_mp_sqr (mp_int * a, mp_int * b) return MP_OKAY; } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bn_s_mp_sub.c b/bn_s_mp_sub.c index 5b7aef9..4f7d47d 100644 --- a/bn_s_mp_sub.c +++ b/bn_s_mp_sub.c @@ -83,3 +83,7 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c) } #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/bncore.c b/bncore.c index 82e3132..fa4e64e 100644 --- a/bncore.c +++ b/bncore.c @@ -20,13 +20,17 @@ CPU /Compiler /MUL CUTOFF/SQR CUTOFF ------------------------------------------------------------- Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-) - AMD Athlon64 /GCC v3.4.4 / 74/ 124/LTM 0.34 + AMD Athlon64 /GCC v3.4.4 / 80/ 120/LTM 0.35 */ -int KARATSUBA_MUL_CUTOFF = 74, /* Min. number of digits before Karatsuba multiplication is used. */ - KARATSUBA_SQR_CUTOFF = 124, /* Min. number of digits before Karatsuba squaring is used. */ +int KARATSUBA_MUL_CUTOFF = 80, /* Min. number of digits before Karatsuba multiplication is used. */ + KARATSUBA_SQR_CUTOFF = 120, /* Min. number of digits before Karatsuba squaring is used. */ TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ TOOM_SQR_CUTOFF = 400; #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/booker.pl b/booker.pl index 5c77e53..f419ab4 100644 --- a/booker.pl +++ b/booker.pl @@ -89,6 +89,9 @@ while () { $inline = 0; while () { + next if ($_ =~ /\$Source/); + next if ($_ =~ /\$Revision/); + next if ($_ =~ /\$Date/); $text[$line++] = $_; ++$inline; chomp($_); diff --git a/changes.txt b/changes.txt index 99e40c1..4f27d63 100644 --- a/changes.txt +++ b/changes.txt @@ -1,3 +1,15 @@ +August 1st, 2005 +v0.36 -- LTM_PRIME_2MSB_ON was fixed and the "OFF" flag was removed. + -- [Peter LaDow] found a typo in the XREALLOC macro + -- [Peter LaDow] pointed out that mp_read_(un)signed_bin should have "const" on the input + -- Ported LTC patch to fix the prime_random_ex() function to get the bitsize correct [and the maskOR flags] + -- Kevin Kenny pointed out a stray // + -- David Hulton pointed out a typo in the textbook [mp_montgomery_setup() pseudo-code] + -- Neal Hamilton (Elliptic Semiconductor) pointed out that my Karatsuba notation was backwards and that I could use + unsigned operations in the routine. + -- Paul Schmidt pointed out a linking error in mp_exptmod() when BN_S_MP_EXPTMOD_C is undefined (and another for read_radix) + -- Updated makefiles to be way more flexible + March 12th, 2005 v0.35 -- Stupid XOR function missing line again... oops. -- Fixed bug in invmod not handling negative inputs correctly [Wolfgang Ehrhardt] diff --git a/demo/demo.c b/demo/demo.c index 0a6115a..b406845 100644 --- a/demo/demo.c +++ b/demo/demo.c @@ -389,8 +389,8 @@ printf("compare no compare!\n"); exit(EXIT_FAILURE); } sub_d_n = 0; /* force KARA and TOOM to enable despite cutoffs */ - KARATSUBA_SQR_CUTOFF = KARATSUBA_MUL_CUTOFF = 110; - TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 150; + KARATSUBA_SQR_CUTOFF = KARATSUBA_MUL_CUTOFF = 8; + TOOM_SQR_CUTOFF = TOOM_MUL_CUTOFF = 16; for (;;) { /* randomly clear and re-init one variable, this has the affect of triming the alloc space */ @@ -734,3 +734,7 @@ printf("compare no compare!\n"); exit(EXIT_FAILURE); } } return 0; } + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/demo/timing.c b/demo/timing.c index bb3be52..12f30e3 100644 --- a/demo/timing.c +++ b/demo/timing.c @@ -313,3 +313,7 @@ int main(void) return 0; } + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/etc/2kprime.c b/etc/2kprime.c index d48b83e..9450283 100644 --- a/etc/2kprime.c +++ b/etc/2kprime.c @@ -78,3 +78,7 @@ int main(void) + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/etc/drprime.c b/etc/drprime.c index 0ab8ea6..c7d253f 100644 --- a/etc/drprime.c +++ b/etc/drprime.c @@ -58,3 +58,7 @@ int main(void) return 0; } + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/etc/makefile.icc b/etc/makefile.icc index 0a50728..8a1ffff 100644 --- a/etc/makefile.icc +++ b/etc/makefile.icc @@ -16,7 +16,7 @@ CFLAGS += -I../ # B - Blend of P4 and PM [mobile] # # Default to just generic max opts -CFLAGS += -O3 -xN -ip +CFLAGS += -O3 -xP -ip # default lib name (requires install with root) # LIBNAME=-ltommath diff --git a/etc/mersenne.c b/etc/mersenne.c index 1cd5b50..5697559 100644 --- a/etc/mersenne.c +++ b/etc/mersenne.c @@ -138,3 +138,7 @@ main (void) } return 0; } + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/etc/mont.c b/etc/mont.c index dbf1735..45cf3fd 100644 --- a/etc/mont.c +++ b/etc/mont.c @@ -44,3 +44,7 @@ int main(void) + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/etc/pprime.c b/etc/pprime.c index 26e0d84..d3a4afe 100644 --- a/etc/pprime.c +++ b/etc/pprime.c @@ -394,3 +394,7 @@ main (void) return 0; } + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/etc/tune.c b/etc/tune.c index d054d10..15a977b 100644 --- a/etc/tune.c +++ b/etc/tune.c @@ -136,3 +136,7 @@ main (void) return 0; } + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/logs/expt.log b/logs/expt.log index 920ba55..70932ab 100644 --- a/logs/expt.log +++ b/logs/expt.log @@ -1,7 +1,7 @@ -513 1489160 -769 3688476 -1025 8162061 -2049 49260015 -2561 89579052 -3073 148797060 -4097 324449263 +513 1435869 +769 3544970 +1025 7791638 +2049 46902238 +2561 85334899 +3073 141451412 +4097 308770310 diff --git a/logs/expt_2k.log b/logs/expt_2k.log index 56b50db..97d325f 100644 --- a/logs/expt_2k.log +++ b/logs/expt_2k.log @@ -1,5 +1,5 @@ -607 2272809 -1279 9557382 -2203 36250309 -3217 87666486 -4253 174168369 +607 2109225 +1279 10148314 +2203 34126877 +3217 82716424 +4253 161569606 diff --git a/logs/expt_2kl.log b/logs/expt_2kl.log index b2eb8c2..d9ad4be 100644 --- a/logs/expt_2kl.log +++ b/logs/expt_2kl.log @@ -1,4 +1,4 @@ -1024 6954080 -2048 35993987 -4096 176068521 -521 1683720 +1024 7705271 +2048 34286851 +4096 165207491 +521 1618631 diff --git a/logs/expt_dr.log b/logs/expt_dr.log index eb93fc9..c6bbe07 100644 --- a/logs/expt_dr.log +++ b/logs/expt_dr.log @@ -1,7 +1,7 @@ -532 1989592 -784 3898697 -1036 6519700 -1540 15676650 -2072 33128187 -3080 82963362 -4116 168358337 +532 1928550 +784 3763908 +1036 7564221 +1540 16566059 +2072 32283784 +3080 79851565 +4116 157843530 diff --git a/logs/index.html b/logs/index.html index 19fe403..4b68c25 100644 --- a/logs/index.html +++ b/logs/index.html @@ -21,4 +21,7 @@
- \ No newline at end of file + +/* $Source: /cvs/libtom/libtommath/logs/index.html,v $ */ +/* $Revision: 1.2 $ */ +/* $Date: 2005/05/05 14:38:47 $ */ diff --git a/logs/sqr.old b/logs/sqr.old deleted file mode 100644 index 3c85882..0000000 --- a/logs/sqr.old +++ /dev/null @@ -1,17 +0,0 @@ -896 382617 -1344 207161 -1792 131522 -2240 90775 -2688 66652 -3136 50955 -3584 11678 -4032 9342 -4480 7684 -4928 6382 -5376 5399 -5824 4545 -6272 3994 -6720 3490 -7168 3075 -7616 2733 -8064 2428 diff --git a/makefile b/makefile index 17873ee..a4697d4 100644 --- a/makefile +++ b/makefile @@ -3,12 +3,14 @@ #Tom St Denis #version of library -VERSION=0.35 +VERSION=0.36 CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare +ifndef IGNORE_SPEED + #for speed -CFLAGS += -O3 -funroll-all-loops +CFLAGS += -O3 -funroll-loops #for size #CFLAGS += -Os @@ -19,14 +21,27 @@ CFLAGS += -fomit-frame-pointer #debug #CFLAGS += -g3 +endif + #install as this user -USER=root -GROUP=root +ifndef INSTALL_GROUP + GROUP=wheel +else + GROUP=$(INSTALL_GROUP) +endif + +ifndef INSTALL_USER + USER=root +else + USER=$(INSTALL_USER) +endif default: libtommath.a #default files to install -LIBNAME=libtommath.a +ifndef LIBNAME + LIBNAME=libtommath.a +endif HEADERS=tommath.h tommath_class.h tommath_superclass.h #LIBPATH-The directory for libtommath to be installed to. @@ -65,9 +80,9 @@ bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_ini bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \ bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o -libtommath.a: $(OBJECTS) - $(AR) $(ARFLAGS) libtommath.a $(OBJECTS) - ranlib libtommath.a +$(LIBNAME): $(OBJECTS) + $(AR) $(ARFLAGS) $@ $(OBJECTS) + ranlib $@ #make a profiled library (takes a while!!!) # @@ -89,23 +104,23 @@ profiled_single: ./ltmtest rm -f *.o ltmtest $(CC) $(CFLAGS) -fbranch-probabilities -DTESTING -c mpi.c -o mpi.o - $(AR) $(ARFLAGS) libtommath.a mpi.o - ranlib libtommath.a + $(AR) $(ARFLAGS) $(LIBNAME) mpi.o + ranlib $(LIBNAME) -install: libtommath.a +install: $(LIBNAME) install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(LIBPATH) install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH) install -g $(GROUP) -o $(USER) $(LIBNAME) $(DESTDIR)$(LIBPATH) install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH) -test: libtommath.a demo/demo.o - $(CC) $(CFLAGS) demo/demo.o libtommath.a -o test +test: $(LIBNAME) demo/demo.o + $(CC) $(CFLAGS) demo/demo.o $(LIBNAME) -o test mtest: test cd mtest ; $(CC) $(CFLAGS) mtest.c -o mtest -timing: libtommath.a - $(CC) $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest +timing: $(LIBNAME) + $(CC) $(CFLAGS) -DTIMER demo/timing.c $(LIBNAME) -o ltmtest # makes the LTM book DVI file, requires tetex, perl and makeindex [part of tetex I think] docdvi: tommath.src @@ -151,6 +166,12 @@ clean: cd etc ; make clean cd pics ; make clean +#zipup the project (take that!) +no_oops: clean + cd .. ; cvs commit + echo Scanning for scratch/dirty files + find . -type f | grep -v CVS | xargs -n 1 bash mess.sh + zipup: clean manual poster docs perl gen.pl ; mv mpi.c pre_gen/ ; \ cd .. ; rm -rf ltm* libtommath-$(VERSION) ; mkdir libtommath-$(VERSION) ; \ diff --git a/makefile.cygwin_dll b/makefile.cygwin_dll index 85b10c7..85a9b20 100644 --- a/makefile.cygwin_dll +++ b/makefile.cygwin_dll @@ -49,3 +49,7 @@ windll: $(OBJECTS) test: $(OBJECTS) windll gcc $(CFLAGS) demo/demo.c libtommath.dll.a -Wl,--enable-auto-import -o test -s cd mtest ; $(CC) -O3 -fomit-frame-pointer -funroll-loops mtest.c -o mtest -s + +/* $Source: /cvs/libtom/libtommath/makefile.cygwin_dll,v $ */ +/* $Revision: 1.2 $ */ +/* $Date: 2005/05/05 14:38:45 $ */ diff --git a/makefile.icc b/makefile.icc index e764253..cf70ab0 100644 --- a/makefile.icc +++ b/makefile.icc @@ -19,7 +19,7 @@ CFLAGS += -I./ # B - Blend of P4 and PM [mobile] # # Default to just generic max opts -CFLAGS += -O3 -xN +CFLAGS += -O3 -xP -ip #install as this user USER=root diff --git a/makefile.msvc b/makefile.msvc index dbbf9f3..5edebec 100644 --- a/makefile.msvc +++ b/makefile.msvc @@ -2,7 +2,7 @@ # #Tom St Denis -CFLAGS = /I. /Ox /DWIN32 /W4 +CFLAGS = /I. /Ox /DWIN32 /W3 /Fo$@ default: library @@ -34,5 +34,7 @@ bn_mp_prime_random_ex.obj bn_mp_get_int.obj bn_mp_sqrt.obj bn_mp_is_square.obj \ bn_mp_init_set.obj bn_mp_init_set_int.obj bn_mp_invmod_slow.obj bn_mp_prime_rabin_miller_trials.obj \ bn_mp_to_signed_bin_n.obj bn_mp_to_unsigned_bin_n.obj +HEADERS=tommath.h tommath_class.h tommath_superclass.h + library: $(OBJECTS) lib /out:tommath.lib $(OBJECTS) diff --git a/makefile.shared b/makefile.shared index 7c35881..821558c 100644 --- a/makefile.shared +++ b/makefile.shared @@ -1,11 +1,14 @@ #Makefile for GCC # #Tom St Denis -VERSION=0:35 +VERSION=0:36 CC = libtool --mode=compile gcc + CFLAGS += -I./ -Wall -W -Wshadow -Wsign-compare +ifndef IGNORE_SPEED + #for speed CFLAGS += -O3 -funroll-loops @@ -15,14 +18,30 @@ CFLAGS += -O3 -funroll-loops #x86 optimizations [should be valid for any GCC install though] CFLAGS += -fomit-frame-pointer +endif + #install as this user -USER=root -GROUP=root +ifndef INSTALL_GROUP + GROUP=wheel +else + GROUP=$(INSTALL_GROUP) +endif + +ifndef INSTALL_USER + USER=root +else + USER=$(INSTALL_USER) +endif default: libtommath.la #default files to install -LIBNAME=libtommath.la +ifndef LIBNAME + LIBNAME=libtommath.la +endif +ifndef LIBNAME_S + LIBNAME_S=libtommath.a +endif HEADERS=tommath.h tommath_class.h tommath_superclass.h #LIBPATH-The directory for libtommath to be installed to. @@ -61,20 +80,20 @@ bn_mp_prime_random_ex.o bn_mp_get_int.o bn_mp_sqrt.o bn_mp_is_square.o bn_mp_ini bn_mp_init_set_int.o bn_mp_invmod_slow.o bn_mp_prime_rabin_miller_trials.o \ bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin_n.o - -libtommath.la: $(OBJECTS) - libtool --mode=link gcc *.lo -o libtommath.la -rpath $(LIBPATH) -version-info $(VERSION) - libtool --mode=link gcc *.o -o libtommath.a - libtool --mode=install install -c libtommath.la $(LIBPATH)/libtommath.la +$(LIBNAME): $(OBJECTS) + libtool --mode=link gcc *.lo -o $(LIBNAME) -rpath $(LIBPATH) -version-info $(VERSION) + libtool --mode=link gcc *.o -o $(LIBNAME_S) + ranlib $(LIBNAME_S) + libtool --mode=install install -c $(LIBNAME) $(LIBPATH)/$@ install -d -g $(GROUP) -o $(USER) $(DESTDIR)$(INCPATH) install -g $(GROUP) -o $(USER) $(HEADERS) $(DESTDIR)$(INCPATH) -test: libtommath.a demo/demo.o +test: $(LIBNAME) demo/demo.o gcc $(CFLAGS) -c demo/demo.c -o demo/demo.o - libtool --mode=link gcc -o test demo/demo.o libtommath.la + libtool --mode=link gcc -o test demo/demo.o $(LIBNAME_S) mtest: test - cd mtest ; gcc $(CFLAGS) mtest.c -o mtest -s + cd mtest ; gcc $(CFLAGS) mtest.c -o mtest -timing: libtommath.la - gcc $(CFLAGS) -DTIMER demo/timing.c libtommath.a -o ltmtest -s +timing: $(LIBNAME) + gcc $(CFLAGS) -DTIMER demo/timing.c $(LIBNAME_S) -o ltmtest diff --git a/mess.sh b/mess.sh new file mode 100644 index 0000000..bf639ce --- /dev/null +++ b/mess.sh @@ -0,0 +1,4 @@ +#!/bin/bash +if cvs log $1 >/dev/null 2>/dev/null; then exit 0; else echo "$1 shouldn't be here" ; exit 1; fi + + diff --git a/mtest/logtab.h b/mtest/logtab.h index 68462bd..751111e 100644 --- a/mtest/logtab.h +++ b/mtest/logtab.h @@ -18,3 +18,7 @@ const float s_logv_2[] = { 0.166666667 }; + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/mtest/mpi-config.h b/mtest/mpi-config.h index 9277dfb..f83a646 100644 --- a/mtest/mpi-config.h +++ b/mtest/mpi-config.h @@ -84,3 +84,7 @@ /* crc==3287762869, version==2, Sat Feb 02 06:43:53 2002 */ + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/mtest/mpi-types.h b/mtest/mpi-types.h index e097188..f99d7ee 100644 --- a/mtest/mpi-types.h +++ b/mtest/mpi-types.h @@ -14,3 +14,7 @@ typedef int mp_err; #define DIGIT_FMT "%04X" #define RADIX (MP_DIGIT_MAX+1) + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/mtest/mpi.c b/mtest/mpi.c index 94019ef..a4b382b 100644 --- a/mtest/mpi.c +++ b/mtest/mpi.c @@ -3979,3 +3979,7 @@ int s_mp_outlen(int bits, int r) /*------------------------------------------------------------------------*/ /* HERE THERE BE DRAGONS */ /* crc==4242132123, version==2, Sat Feb 02 06:43:52 2002 */ + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/mtest/mpi.h b/mtest/mpi.h index e19ecf8..d84435b 100644 --- a/mtest/mpi.h +++ b/mtest/mpi.h @@ -225,3 +225,7 @@ int mp_char2value(char ch, int r); const char *mp_strerror(mp_err ec); #endif /* end _H_MPI_ */ + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/mtest/mtest.c b/mtest/mtest.c index d46f456..6ac2c81 100644 --- a/mtest/mtest.c +++ b/mtest/mtest.c @@ -302,3 +302,7 @@ int main(void) fclose(rng); return 0; } + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/poster.pdf b/poster.pdf index 4c3e365..faceef1 100644 Binary files a/poster.pdf and b/poster.pdf differ diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c index 8ec8a10..af6523d 100644 --- a/pre_gen/mpi.c +++ b/pre_gen/mpi.c @@ -43,6 +43,10 @@ char *mp_error_to_string(int code) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_error.c */ /* Start: bn_fast_mp_invmod.c */ @@ -191,6 +195,10 @@ LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_fast_mp_invmod.c */ /* Start: bn_fast_mp_montgomery_reduce.c */ @@ -363,6 +371,10 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_fast_mp_montgomery_reduce.c */ /* Start: bn_fast_s_mp_mul_digs.c */ @@ -438,6 +450,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) /* execute loop */ for (iz = 0; iz < iy; ++iz) { _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); + } /* store term */ @@ -472,6 +485,10 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_fast_s_mp_mul_digs.c */ /* Start: bn_fast_s_mp_mul_high_digs.c */ @@ -573,6 +590,10 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_fast_s_mp_mul_high_digs.c */ /* Start: bn_fast_s_mp_sqr.c */ @@ -687,6 +708,10 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_fast_s_mp_sqr.c */ /* Start: bn_mp_2expt.c */ @@ -735,6 +760,10 @@ mp_2expt (mp_int * a, int b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_2expt.c */ /* Start: bn_mp_abs.c */ @@ -778,6 +807,10 @@ mp_abs (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_abs.c */ /* Start: bn_mp_add.c */ @@ -831,6 +864,10 @@ int mp_add (mp_int * a, mp_int * b, mp_int * c) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_add.c */ /* Start: bn_mp_add_d.c */ @@ -940,6 +977,10 @@ mp_add_d (mp_int * a, mp_digit b, mp_int * c) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_add_d.c */ /* Start: bn_mp_addmod.c */ @@ -981,6 +1022,10 @@ mp_addmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_addmod.c */ /* Start: bn_mp_and.c */ @@ -1038,6 +1083,10 @@ mp_and (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_and.c */ /* Start: bn_mp_clamp.c */ @@ -1082,6 +1131,10 @@ mp_clamp (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_clamp.c */ /* Start: bn_mp_clear.c */ @@ -1126,6 +1179,10 @@ mp_clear (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_clear.c */ /* Start: bn_mp_clear_multi.c */ @@ -1160,6 +1217,10 @@ void mp_clear_multi(mp_int *mp, ...) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_clear_multi.c */ /* Start: bn_mp_cmp.c */ @@ -1203,6 +1264,10 @@ mp_cmp (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_cmp.c */ /* Start: bn_mp_cmp_d.c */ @@ -1247,6 +1312,10 @@ int mp_cmp_d(mp_int * a, mp_digit b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_cmp_d.c */ /* Start: bn_mp_cmp_mag.c */ @@ -1302,6 +1371,10 @@ int mp_cmp_mag (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_cmp_mag.c */ /* Start: bn_mp_cnt_lsb.c */ @@ -1355,6 +1428,10 @@ int mp_cnt_lsb(mp_int *a) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_cnt_lsb.c */ /* Start: bn_mp_copy.c */ @@ -1423,6 +1500,10 @@ mp_copy (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_copy.c */ /* Start: bn_mp_count_bits.c */ @@ -1468,6 +1549,10 @@ mp_count_bits (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_count_bits.c */ /* Start: bn_mp_div.c */ @@ -1760,6 +1845,10 @@ LBL_Q:mp_clear (&q); #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_div.c */ /* Start: bn_mp_div_2.c */ @@ -1828,6 +1917,10 @@ int mp_div_2(mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_div_2.c */ /* Start: bn_mp_div_2d.c */ @@ -1925,6 +2018,10 @@ int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_div_2d.c */ /* Start: bn_mp_div_3.c */ @@ -2004,6 +2101,10 @@ mp_div_3 (mp_int * a, mp_int *c, mp_digit * d) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_div_3.c */ /* Start: bn_mp_div_d.c */ @@ -2114,6 +2215,10 @@ int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_div_d.c */ /* Start: bn_mp_dr_is_modulus.c */ @@ -2157,6 +2262,10 @@ int mp_dr_is_modulus(mp_int *a) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_dr_is_modulus.c */ /* Start: bn_mp_dr_reduce.c */ @@ -2251,6 +2360,10 @@ top: } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_dr_reduce.c */ /* Start: bn_mp_dr_setup.c */ @@ -2283,6 +2396,10 @@ void mp_dr_setup(mp_int *a, mp_digit *d) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_dr_setup.c */ /* Start: bn_mp_exch.c */ @@ -2317,6 +2434,10 @@ mp_exch (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_exch.c */ /* Start: bn_mp_expt_d.c */ @@ -2374,6 +2495,10 @@ int mp_expt_d (mp_int * a, mp_digit b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_expt_d.c */ /* Start: bn_mp_exptmod.c */ @@ -2445,7 +2570,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) } /* modified diminished radix reduction */ -#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) +#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C) if (mp_reduce_is_2k_l(P) == MP_YES) { return s_mp_exptmod(G, X, P, Y, 1); } @@ -2486,6 +2611,10 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_exptmod.c */ /* Start: bn_mp_exptmod_fast.c */ @@ -2807,6 +2936,10 @@ LBL_M: #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_exptmod_fast.c */ /* Start: bn_mp_exteuclid.c */ @@ -2889,6 +3022,10 @@ _ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_exteuclid.c */ /* Start: bn_mp_fread.c */ @@ -2956,6 +3093,10 @@ int mp_fread(mp_int *a, int radix, FILE *stream) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_fread.c */ /* Start: bn_mp_fwrite.c */ @@ -3008,6 +3149,10 @@ int mp_fwrite(mp_int *a, int radix, FILE *stream) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_fwrite.c */ /* Start: bn_mp_gcd.c */ @@ -3121,6 +3266,10 @@ LBL_U:mp_clear (&v); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_gcd.c */ /* Start: bn_mp_get_int.c */ @@ -3166,6 +3315,10 @@ unsigned long mp_get_int(mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_get_int.c */ /* Start: bn_mp_grow.c */ @@ -3223,6 +3376,10 @@ int mp_grow (mp_int * a, int size) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_grow.c */ /* Start: bn_mp_init.c */ @@ -3269,6 +3426,10 @@ int mp_init (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_init.c */ /* Start: bn_mp_init_copy.c */ @@ -3301,6 +3462,10 @@ int mp_init_copy (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_init_copy.c */ /* Start: bn_mp_init_multi.c */ @@ -3360,6 +3525,10 @@ int mp_init_multi(mp_int *mp, ...) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_init_multi.c */ /* Start: bn_mp_init_set.c */ @@ -3392,6 +3561,10 @@ int mp_init_set (mp_int * a, mp_digit b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_init_set.c */ /* Start: bn_mp_init_set_int.c */ @@ -3423,6 +3596,10 @@ int mp_init_set_int (mp_int * a, unsigned long b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_init_set_int.c */ /* Start: bn_mp_init_size.c */ @@ -3471,6 +3648,10 @@ int mp_init_size (mp_int * a, int size) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_init_size.c */ /* Start: bn_mp_invmod.c */ @@ -3514,6 +3695,10 @@ int mp_invmod (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_invmod.c */ /* Start: bn_mp_invmod_slow.c */ @@ -3689,6 +3874,10 @@ LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_invmod_slow.c */ /* Start: bn_mp_is_square.c */ @@ -3798,6 +3987,10 @@ ERR:mp_clear(&t); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_is_square.c */ /* Start: bn_mp_jacobi.c */ @@ -3903,6 +4096,10 @@ LBL_A1:mp_clear (&a1); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_jacobi.c */ /* Start: bn_mp_karatsuba_mul.c */ @@ -3934,12 +4131,12 @@ LBL_A1:mp_clear (&a1); * b = b1 * B**n + b0 * * Then, a * b => - a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0 + a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 * * Note that a1b1 and a0b0 are used twice and only need to be * computed once. So in total three half size (half # of * digit) multiplications are performed, a0b0, a1b1 and - * (a1-b1)(a0-b0) + * (a1+b1)(a0+b0) * * Note that a multiplication of half the digits requires * 1/4th the number of single precision multiplications so in @@ -4030,19 +4227,19 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) goto X1Y1; /* x1y1 = x1*y1 */ - /* now calc x1-x0 and y1-y0 */ - if (mp_sub (&x1, &x0, &t1) != MP_OKAY) + /* now calc x1+x0 and y1+y0 */ + if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1Y1; /* t1 = x1 - x0 */ - if (mp_sub (&y1, &y0, &x0) != MP_OKAY) + if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) goto X1Y1; /* t2 = y1 - y0 */ if (mp_mul (&t1, &x0, &t1) != MP_OKAY) - goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ + goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ /* add x0y0 */ if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) goto X1Y1; /* t2 = x0y0 + x1y1 */ - if (mp_sub (&x0, &t1, &t1) != MP_OKAY) - goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ + if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) + goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) @@ -4070,6 +4267,10 @@ ERR: } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_karatsuba_mul.c */ /* Start: bn_mp_karatsuba_sqr.c */ @@ -4155,8 +4356,8 @@ int mp_karatsuba_sqr (mp_int * a, mp_int * b) if (mp_sqr (&x1, &x1x1) != MP_OKAY) goto X1X1; /* x1x1 = x1*x1 */ - /* now calc (x1-x0)**2 */ - if (mp_sub (&x1, &x0, &t1) != MP_OKAY) + /* now calc (x1+x0)**2 */ + if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) goto X1X1; /* t1 = x1 - x0 */ if (mp_sqr (&t1, &t1) != MP_OKAY) goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ @@ -4164,8 +4365,8 @@ int mp_karatsuba_sqr (mp_int * a, mp_int * b) /* add x0y0 */ if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) goto X1X1; /* t2 = x0x0 + x1x1 */ - if (mp_sub (&t2, &t1, &t1) != MP_OKAY) - goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */ + if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) + goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ /* shift by B */ if (mp_lshd (&t1, B) != MP_OKAY) @@ -4191,6 +4392,10 @@ ERR: } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_karatsuba_sqr.c */ /* Start: bn_mp_lcm.c */ @@ -4251,6 +4456,10 @@ LBL_T: } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_lcm.c */ /* Start: bn_mp_lshd.c */ @@ -4318,6 +4527,10 @@ int mp_lshd (mp_int * a, int b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_lshd.c */ /* Start: bn_mp_mod.c */ @@ -4366,6 +4579,10 @@ mp_mod (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mod.c */ /* Start: bn_mp_mod_2d.c */ @@ -4421,6 +4638,10 @@ mp_mod_2d (mp_int * a, int b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mod_2d.c */ /* Start: bn_mp_mod_d.c */ @@ -4448,6 +4669,10 @@ mp_mod_d (mp_int * a, mp_digit b, mp_digit * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mod_d.c */ /* Start: bn_mp_montgomery_calc_normalization.c */ @@ -4507,6 +4732,10 @@ int mp_montgomery_calc_normalization (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_montgomery_calc_normalization.c */ /* Start: bn_mp_montgomery_reduce.c */ @@ -4625,6 +4854,10 @@ mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_montgomery_reduce.c */ /* Start: bn_mp_montgomery_setup.c */ @@ -4684,6 +4917,10 @@ mp_montgomery_setup (mp_int * n, mp_digit * rho) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_montgomery_setup.c */ /* Start: bn_mp_mul.c */ @@ -4750,6 +4987,10 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mul.c */ /* Start: bn_mp_mul_2.c */ @@ -4832,6 +5073,10 @@ int mp_mul_2(mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mul_2.c */ /* Start: bn_mp_mul_2d.c */ @@ -4917,6 +5162,10 @@ int mp_mul_2d (mp_int * a, int b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mul_2d.c */ /* Start: bn_mp_mul_d.c */ @@ -4996,6 +5245,10 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mul_d.c */ /* Start: bn_mp_mulmod.c */ @@ -5017,8 +5270,7 @@ mp_mul_d (mp_int * a, mp_digit b, mp_int * c) */ /* d = a * b (mod c) */ -int -mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) +int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) { int res; mp_int t; @@ -5037,6 +5289,10 @@ mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_mulmod.c */ /* Start: bn_mp_n_root.c */ @@ -5169,6 +5425,10 @@ LBL_T1:mp_clear (&t1); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_n_root.c */ /* Start: bn_mp_neg.c */ @@ -5209,6 +5469,10 @@ int mp_neg (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_neg.c */ /* Start: bn_mp_or.c */ @@ -5259,6 +5523,10 @@ int mp_or (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_or.c */ /* Start: bn_mp_prime_fermat.c */ @@ -5321,6 +5589,10 @@ LBL_T:mp_clear (&t); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_prime_fermat.c */ /* Start: bn_mp_prime_is_divisible.c */ @@ -5371,6 +5643,10 @@ int mp_prime_is_divisible (mp_int * a, int *result) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_prime_is_divisible.c */ /* Start: bn_mp_prime_is_prime.c */ @@ -5454,6 +5730,10 @@ LBL_B:mp_clear (&b); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_prime_is_prime.c */ /* Start: bn_mp_prime_miller_rabin.c */ @@ -5557,6 +5837,10 @@ LBL_N1:mp_clear (&n1); } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_prime_miller_rabin.c */ /* Start: bn_mp_prime_next_prime.c */ @@ -5727,6 +6011,10 @@ LBL_ERR: #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_prime_next_prime.c */ /* Start: bn_mp_prime_rabin_miller_trials.c */ @@ -5779,6 +6067,10 @@ int mp_prime_rabin_miller_trials(int size) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_prime_rabin_miller_trials.c */ /* Start: bn_mp_prime_random_ex.c */ @@ -5846,10 +6138,8 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback maskOR_msb = 0; maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0; if (flags & LTM_PRIME_2MSB_ON) { - maskOR_msb |= 1 << ((size - 2) & 7); - } else if (flags & LTM_PRIME_2MSB_OFF) { - maskAND &= ~(1 << ((size - 2) & 7)); - } + maskOR_msb |= 0x80 >> ((9 - size) & 7); + } /* get the maskOR_lsb */ maskOR_lsb = 1; @@ -5906,6 +6196,10 @@ error: #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_prime_random_ex.c */ /* Start: bn_mp_radix_size.c */ @@ -5984,6 +6278,10 @@ int mp_radix_size (mp_int * a, int radix, int *size) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_radix_size.c */ /* Start: bn_mp_radix_smap.c */ @@ -6008,6 +6306,10 @@ int mp_radix_size (mp_int * a, int radix, int *size) const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_radix_smap.c */ /* Start: bn_mp_rand.c */ @@ -6063,6 +6365,10 @@ mp_rand (mp_int * a, int digits) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_rand.c */ /* Start: bn_mp_read_radix.c */ @@ -6145,6 +6451,10 @@ int mp_read_radix (mp_int * a, const char *str, int radix) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_read_radix.c */ /* Start: bn_mp_read_signed_bin.c */ @@ -6166,8 +6476,7 @@ int mp_read_radix (mp_int * a, const char *str, int radix) */ /* read signed bin, big endian, first byte is 0==positive or 1==negative */ -int -mp_read_signed_bin (mp_int * a, unsigned char *b, int c) +int mp_read_signed_bin (mp_int * a, const unsigned char *b, int c) { int res; @@ -6187,6 +6496,10 @@ mp_read_signed_bin (mp_int * a, unsigned char *b, int c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_read_signed_bin.c */ /* Start: bn_mp_read_unsigned_bin.c */ @@ -6208,8 +6521,7 @@ mp_read_signed_bin (mp_int * a, unsigned char *b, int c) */ /* reads a unsigned char array, assumes the msb is stored first [big endian] */ -int -mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c) +int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c) { int res; @@ -6243,6 +6555,10 @@ mp_read_unsigned_bin (mp_int * a, unsigned char *b, int c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_read_unsigned_bin.c */ /* Start: bn_mp_reduce.c */ @@ -6343,6 +6659,10 @@ CLEANUP: } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce.c */ /* Start: bn_mp_reduce_2k.c */ @@ -6404,6 +6724,10 @@ ERR: #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce_2k.c */ /* Start: bn_mp_reduce_2k_l.c */ @@ -6466,6 +6790,10 @@ ERR: #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce_2k_l.c */ /* Start: bn_mp_reduce_2k_setup.c */ @@ -6513,6 +6841,10 @@ int mp_reduce_2k_setup(mp_int *a, mp_digit *d) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce_2k_setup.c */ /* Start: bn_mp_reduce_2k_setup_l.c */ @@ -6557,6 +6889,10 @@ ERR: } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce_2k_setup_l.c */ /* Start: bn_mp_reduce_is_2k.c */ @@ -6609,6 +6945,10 @@ int mp_reduce_is_2k(mp_int *a) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce_is_2k.c */ /* Start: bn_mp_reduce_is_2k_l.c */ @@ -6653,6 +6993,10 @@ int mp_reduce_is_2k_l(mp_int *a) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce_is_2k_l.c */ /* Start: bn_mp_reduce_setup.c */ @@ -6687,6 +7031,10 @@ int mp_reduce_setup (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_reduce_setup.c */ /* Start: bn_mp_rshd.c */ @@ -6759,6 +7107,10 @@ void mp_rshd (mp_int * a, int b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_rshd.c */ /* Start: bn_mp_set.c */ @@ -6788,6 +7140,10 @@ void mp_set (mp_int * a, mp_digit b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_set.c */ /* Start: bn_mp_set_int.c */ @@ -6836,6 +7192,10 @@ int mp_set_int (mp_int * a, unsigned long b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_set_int.c */ /* Start: bn_mp_shrink.c */ @@ -6871,6 +7231,10 @@ int mp_shrink (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_shrink.c */ /* Start: bn_mp_signed_bin_size.c */ @@ -6898,6 +7262,10 @@ int mp_signed_bin_size (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_signed_bin_size.c */ /* Start: bn_mp_sqr.c */ @@ -6956,6 +7324,10 @@ if (a->used >= KARATSUBA_SQR_CUTOFF) { } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_sqr.c */ /* Start: bn_mp_sqrmod.c */ @@ -6997,6 +7369,10 @@ mp_sqrmod (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_sqrmod.c */ /* Start: bn_mp_sqrt.c */ @@ -7078,6 +7454,10 @@ E2: mp_clear(&t1); #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_sqrt.c */ /* Start: bn_mp_sub.c */ @@ -7137,6 +7517,10 @@ mp_sub (mp_int * a, mp_int * b, mp_int * c) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_sub.c */ /* Start: bn_mp_sub_d.c */ @@ -7226,6 +7610,10 @@ mp_sub_d (mp_int * a, mp_digit b, mp_int * c) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_sub_d.c */ /* Start: bn_mp_submod.c */ @@ -7268,6 +7656,10 @@ mp_submod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_submod.c */ /* Start: bn_mp_to_signed_bin.c */ @@ -7301,6 +7693,10 @@ int mp_to_signed_bin (mp_int * a, unsigned char *b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_to_signed_bin.c */ /* Start: bn_mp_to_signed_bin_n.c */ @@ -7332,6 +7728,10 @@ int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_to_signed_bin_n.c */ /* Start: bn_mp_to_unsigned_bin.c */ @@ -7380,6 +7780,10 @@ int mp_to_unsigned_bin (mp_int * a, unsigned char *b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_to_unsigned_bin.c */ /* Start: bn_mp_to_unsigned_bin_n.c */ @@ -7411,6 +7815,10 @@ int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_to_unsigned_bin_n.c */ /* Start: bn_mp_toom_mul.c */ @@ -7695,6 +8103,10 @@ ERR: #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_toom_mul.c */ /* Start: bn_mp_toom_sqr.c */ @@ -7921,6 +8333,10 @@ ERR: #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_toom_sqr.c */ /* Start: bn_mp_toradix.c */ @@ -7996,6 +8412,10 @@ int mp_toradix (mp_int * a, char *str, int radix) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_toradix.c */ /* Start: bn_mp_toradix_n.c */ @@ -8085,6 +8505,10 @@ int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_toradix_n.c */ /* Start: bn_mp_unsigned_bin_size.c */ @@ -8113,6 +8537,10 @@ int mp_unsigned_bin_size (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_unsigned_bin_size.c */ /* Start: bn_mp_xor.c */ @@ -8164,6 +8592,10 @@ mp_xor (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_xor.c */ /* Start: bn_mp_zero.c */ @@ -8200,6 +8632,10 @@ void mp_zero (mp_int * a) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_mp_zero.c */ /* Start: bn_prime_tab.c */ @@ -8261,6 +8697,10 @@ const mp_digit ltm_prime_tab[] = { }; #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_prime_tab.c */ /* Start: bn_reverse.c */ @@ -8300,6 +8740,10 @@ bn_reverse (unsigned char *s, int len) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_reverse.c */ /* Start: bn_s_mp_add.c */ @@ -8409,6 +8853,10 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_s_mp_add.c */ /* Start: bn_s_mp_exptmod.c */ @@ -8428,7 +8876,6 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c) * * Tom St Denis, tomstdenis@iahu.ca, http://math.libtomcrypt.org */ - #ifdef MP_LOW_MEM #define TAB_SIZE 32 #else @@ -8662,6 +9109,10 @@ LBL_M: } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_s_mp_exptmod.c */ /* Start: bn_s_mp_mul_digs.c */ @@ -8752,6 +9203,10 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_s_mp_mul_digs.c */ /* Start: bn_s_mp_mul_high_digs.c */ @@ -8833,6 +9288,10 @@ s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_s_mp_mul_high_digs.c */ /* Start: bn_s_mp_sqr.c */ @@ -8917,6 +9376,10 @@ int s_mp_sqr (mp_int * a, mp_int * b) } #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_s_mp_sqr.c */ /* Start: bn_s_mp_sub.c */ @@ -9006,6 +9469,10 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c) #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bn_s_mp_sub.c */ /* Start: bncore.c */ @@ -9031,17 +9498,21 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c) CPU /Compiler /MUL CUTOFF/SQR CUTOFF ------------------------------------------------------------- Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-) - AMD Athlon64 /GCC v3.4.4 / 74/ 124/LTM 0.34 + AMD Athlon64 /GCC v3.4.4 / 80/ 120/LTM 0.35 */ -int KARATSUBA_MUL_CUTOFF = 74, /* Min. number of digits before Karatsuba multiplication is used. */ - KARATSUBA_SQR_CUTOFF = 124, /* Min. number of digits before Karatsuba squaring is used. */ +int KARATSUBA_MUL_CUTOFF = 80, /* Min. number of digits before Karatsuba multiplication is used. */ + KARATSUBA_SQR_CUTOFF = 120, /* Min. number of digits before Karatsuba squaring is used. */ TOOM_MUL_CUTOFF = 350, /* no optimal values of these are known yet so set em high */ TOOM_SQR_CUTOFF = 400; #endif +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ + /* End: bncore.c */ diff --git a/tommath.h b/tommath.h index bcb9d86..80db43c 100644 --- a/tommath.h +++ b/tommath.h @@ -23,10 +23,13 @@ #include -#undef MIN -#define MIN(x,y) ((x)<(y)?(x):(y)) -#undef MAX -#define MAX(x,y) ((x)>(y)?(x):(y)) +#ifndef MIN + #define MIN(x,y) ((x)<(y)?(x):(y)) +#endif + +#ifndef MAX + #define MAX(x,y) ((x)>(y)?(x):(y)) +#endif #ifdef __cplusplus extern "C" { @@ -112,7 +115,7 @@ extern "C" { #else /* prototypes for our heap functions */ extern void *XMALLOC(size_t n); - extern void *REALLOC(void *p, size_t n); + extern void *XREALLOC(void *p, size_t n); extern void *XCALLOC(size_t n, size_t s); extern void XFREE(void *p); #endif @@ -147,7 +150,6 @@ extern "C" { /* Primality generation flags */ #define LTM_PRIME_BBS 0x0001 /* BBS style prime */ #define LTM_PRIME_SAFE 0x0002 /* Safe prime (p-1)/2 == prime */ -#define LTM_PRIME_2MSB_OFF 0x0004 /* force 2nd MSB to 0 */ #define LTM_PRIME_2MSB_ON 0x0008 /* force 2nd MSB to 1 */ typedef int mp_err; @@ -164,7 +166,7 @@ extern int KARATSUBA_MUL_CUTOFF, /* default precision */ #ifndef MP_PREC #ifndef MP_LOW_MEM - #define MP_PREC 64 /* default digits of precision */ + #define MP_PREC 32 /* default digits of precision */ #else #define MP_PREC 8 /* default digits of precision */ #endif @@ -518,13 +520,13 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback int mp_count_bits(mp_int *a); int mp_unsigned_bin_size(mp_int *a); -int mp_read_unsigned_bin(mp_int *a, unsigned char *b, int c); +int mp_read_unsigned_bin(mp_int *a, const unsigned char *b, int c); int mp_to_unsigned_bin(mp_int *a, unsigned char *b); int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen); int mp_signed_bin_size(mp_int *a); -int mp_read_signed_bin(mp_int *a, unsigned char *b, int c); -int mp_to_signed_bin(mp_int *a, unsigned char *b); +int mp_read_signed_bin(mp_int *a, const unsigned char *b, int c); +int mp_to_signed_bin(mp_int *a, unsigned char *b); int mp_to_signed_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen); int mp_read_radix(mp_int *a, const char *str, int radix); @@ -576,3 +578,7 @@ extern const char *mp_s_rmap; #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/tommath.pdf b/tommath.pdf index c486d29..08f6a1e 100644 Binary files a/tommath.pdf and b/tommath.pdf differ diff --git a/tommath.src b/tommath.src index 7a53860..b392ead 100644 --- a/tommath.src +++ b/tommath.src @@ -66,7 +66,7 @@ QUALCOMM Australia \\ } } \maketitle -This text has been placed in the public domain. This text corresponds to the v0.35 release of the +This text has been placed in the public domain. This text corresponds to the v0.36 release of the LibTomMath project. \begin{alltt} @@ -2775,26 +2775,25 @@ general purpose multiplication. Given two polynomial basis representations $f(x light algebra \cite{KARAP} that the following polynomial is equivalent to multiplication of the two integers the polynomials represent. \begin{equation} -f(x) \cdot g(x) = acx^2 + ((a - b)(c - d) - (ac + bd))x + bd +f(x) \cdot g(x) = acx^2 + ((a + b)(c + d) - (ac + bd))x + bd \end{equation} Using the observation that $ac$ and $bd$ could be re-used only three half sized multiplications would be required to produce the product. Applying this algorithm recursively, the work factor becomes $O(n^{lg(3)})$ which is substantially better than the work factor $O(n^2)$ of the Comba technique. It turns out what Karatsuba did not know or at least did not publish was that this is simply polynomial basis multiplication with the points -$\zeta_0$, $\zeta_{\infty}$ and $-\zeta_{-1}$. Consider the resultant system of equations. +$\zeta_0$, $\zeta_{\infty}$ and $\zeta_{1}$. Consider the resultant system of equations. \begin{center} \begin{tabular}{rcrcrcrc} $\zeta_{0}$ & $=$ & & & & & $w_0$ \\ -$-\zeta_{-1}$ & $=$ & $-w_2$ & $+$ & $w_1$ & $-$ & $w_0$ \\ +$\zeta_{1}$ & $=$ & $w_2$ & $+$ & $w_1$ & $+$ & $w_0$ \\ $\zeta_{\infty}$ & $=$ & $w_2$ & & & & \\ \end{tabular} \end{center} By adding the first and last equation to the equation in the middle the term $w_1$ can be isolated and all three coefficients solved for. The simplicity of this system of equations has made Karatsuba fairly popular. In fact the cutoff point is often fairly low\footnote{With LibTomMath 0.18 it is 70 and 109 digits for the Intel P4 and AMD Athlon respectively.} -making it an ideal algorithm to speed up certain public key cryptosystems such as RSA and Diffie-Hellman. It is worth noting that the point -$\zeta_1$ could be substituted for $-\zeta_{-1}$. In this case the first and third row are subtracted instead of added to the second row. +making it an ideal algorithm to speed up certain public key cryptosystems such as RSA and Diffie-Hellman. \newpage\begin{figure}[!here] \begin{small} @@ -2817,13 +2816,13 @@ Split the input. e.g. $a = x1 \cdot \beta^B + x0$ \\ Calculate the three products. \\ 8. $x0y0 \leftarrow x0 \cdot y0$ (\textit{mp\_mul}) \\ 9. $x1y1 \leftarrow x1 \cdot y1$ \\ -10. $t1 \leftarrow x1 - x0$ (\textit{mp\_sub}) \\ -11. $x0 \leftarrow y1 - y0$ \\ +10. $t1 \leftarrow x1 + x0$ (\textit{mp\_add}) \\ +11. $x0 \leftarrow y1 + y0$ \\ 12. $t1 \leftarrow t1 \cdot x0$ \\ \\ Calculate the middle term. \\ 13. $x0 \leftarrow x0y0 + x1y1$ \\ -14. $t1 \leftarrow x0 - t1$ \\ +14. $t1 \leftarrow t1 - x0$ (\textit{s\_mp\_sub}) \\ \\ Calculate the final product. \\ 15. $t1 \leftarrow t1 \cdot \beta^B$ (\textit{mp\_lshd}) \\ @@ -2850,7 +2849,7 @@ smallest input \textbf{used} count. After the radix point is chosen the inputs compute the lower halves. Step 6 and 7 computer the upper halves. After the halves have been computed the three intermediate half-size products must be computed. Step 8 and 9 compute the trivial products -$x0 \cdot y0$ and $x1 \cdot y1$. The mp\_int $x0$ is used as a temporary variable after $x1 - x0$ has been computed. By using $x0$ instead +$x0 \cdot y0$ and $x1 \cdot y1$. The mp\_int $x0$ is used as a temporary variable after $x1 + x0$ has been computed. By using $x0$ instead of an additional temporary variable, the algorithm can avoid an addition memory allocation operation. The remaining steps 13 through 18 compute the Karatsuba polynomial through a variety of digit shifting and addition operations. @@ -3246,10 +3245,10 @@ Let $h(x) = \left ( f(x) \right )^2$ represent the square of the polynomial. Th number with the following equation. \begin{equation} -h(x) = a^2x^2 + \left (a^2 + b^2 - (a - b)^2 \right )x + b^2 +h(x) = a^2x^2 + \left ((a + b)^2 - (a^2 + b^2) \right )x + b^2 \end{equation} -Upon closer inspection this equation only requires the calculation of three half-sized squares: $a^2$, $b^2$ and $(a - b)^2$. As in +Upon closer inspection this equation only requires the calculation of three half-sized squares: $a^2$, $b^2$ and $(a + b)^2$. As in Karatsuba multiplication, this algorithm can be applied recursively on the input and will achieve an asymptotic running time of $O \left ( n^{lg(3)} \right )$. @@ -3281,12 +3280,12 @@ Split the input. e.g. $a = x1\beta^B + x0$ \\ Calculate the three squares. \\ 6. $x0x0 \leftarrow x0^2$ (\textit{mp\_sqr}) \\ 7. $x1x1 \leftarrow x1^2$ \\ -8. $t1 \leftarrow x1 - x0$ (\textit{mp\_sub}) \\ +8. $t1 \leftarrow x1 + x0$ (\textit{s\_mp\_add}) \\ 9. $t1 \leftarrow t1^2$ \\ \\ Compute the middle term. \\ 10. $t2 \leftarrow x0x0 + x1x1$ (\textit{s\_mp\_add}) \\ -11. $t1 \leftarrow t2 - t1$ \\ +11. $t1 \leftarrow t1 - t2$ \\ \\ Compute final product. \\ 12. $t1 \leftarrow t1\beta^B$ (\textit{mp\_lshd}) \\ @@ -3309,7 +3308,7 @@ The radix point for squaring is simply placed exactly in the middle of the digit placed just below the middle. Step 3, 4 and 5 compute the two halves required using $B$ as the radix point. The first two squares in steps 6 and 7 are rather straightforward while the last square is of a more compact form. -By expanding $\left (x1 - x0 \right )^2$, the $x1^2$ and $x0^2$ terms in the middle disappear, that is $x1^2 + x0^2 - (x1 - x0)^2 = 2 \cdot x0 \cdot x1$. +By expanding $\left (x1 + x0 \right )^2$, the $x1^2$ and $x0^2$ terms in the middle disappear, that is $(x0 - x1)^2 - (x1^2 + x0^2) = 2 \cdot x0 \cdot x1$. Now if $5n$ single precision additions and a squaring of $n$-digits is faster than multiplying two $n$-digit numbers and doubling then this method is faster. Assuming no further recursions occur, the difference can be estimated with the following inequality. @@ -4035,7 +4034,7 @@ To calculate the variable $\rho$ a relatively simple algorithm will be required. \hline \\ 1. $b \leftarrow n_0$ \\ 2. If $b$ is even return(\textit{MP\_VAL}) \\ -3. $x \leftarrow ((b + 2) \mbox{ AND } 4) << 1) + b$ \\ +3. $x \leftarrow (((b + 2) \mbox{ AND } 4) << 1) + b$ \\ 4. for $k$ from 0 to $\lceil lg(lg(\beta)) \rceil - 2$ do \\ \hspace{3mm}4.1 $x \leftarrow x \cdot (2 - bx)$ \\ 5. $\rho \leftarrow \beta - x \mbox{ (mod }\beta\mbox{)}$ \\ diff --git a/tommath.tex b/tommath.tex index b016010..b69421b 100644 --- a/tommath.tex +++ b/tommath.tex @@ -66,7 +66,7 @@ QUALCOMM Australia \\ } } \maketitle -This text has been placed in the public domain. This text corresponds to the v0.35 release of the +This text has been placed in the public domain. This text corresponds to the v0.36 release of the LibTomMath project. \begin{alltt} @@ -814,6 +814,7 @@ decrementally. 039 return MP_OKAY; 040 \} 041 #endif +042 \end{alltt} \end{small} @@ -902,6 +903,7 @@ with the exception of algorithms mp\_init, mp\_init\_copy, mp\_init\_size and mp 037 \} 038 \} 039 #endif +040 \end{alltt} \end{small} @@ -1008,6 +1010,7 @@ assumed to contain undefined values they are initially set to zero. 050 return MP_OKAY; 051 \} 052 #endif +053 \end{alltt} \end{small} @@ -1096,6 +1099,7 @@ correct no further memory re-allocations are required to work with the mp\_int. 041 return MP_OKAY; 042 \} 043 #endif +044 \end{alltt} \end{small} @@ -1183,6 +1187,7 @@ initialization which allows for quick recovery from runtime errors. 052 \} 053 054 #endif +055 \end{alltt} \end{small} @@ -1268,6 +1273,7 @@ when all of the digits are zero to ensure that the mp\_int is valid at all times 037 \} 038 \} 039 #endif +040 \end{alltt} \end{small} @@ -1405,6 +1411,7 @@ implement the pseudo-code. 061 return MP_OKAY; 062 \} 063 #endif +064 \end{alltt} \end{small} @@ -1519,6 +1526,7 @@ such this algorithm will perform two operations in one step. 025 return mp_copy (b, a); 026 \} 027 #endif +028 \end{alltt} \end{small} @@ -1570,6 +1578,7 @@ This algorithm simply resets a mp\_int to the default state. 029 \} 030 \} 031 #endif +032 \end{alltt} \end{small} @@ -1631,6 +1640,7 @@ logic to handle it. 036 return MP_OKAY; 037 \} 038 #endif +039 \end{alltt} \end{small} @@ -1692,6 +1702,7 @@ zero as negative. 033 return MP_OKAY; 034 \} 035 #endif +036 \end{alltt} \end{small} @@ -1739,6 +1750,7 @@ single digit is set (\textit{modulo $\beta$}) and the \textbf{used} count is adj 022 a->used = (a->dp[0] != 0) ? 1 : 0; 023 \} 024 #endif +025 \end{alltt} \end{small} @@ -1819,6 +1831,7 @@ Excess zero digits are trimmed in steps 2.1 and 3 by using higher level algorith 041 return MP_OKAY; 042 \} 043 #endif +044 \end{alltt} \end{small} @@ -1921,6 +1934,7 @@ the zero'th digit. If after all of the digits have been compared, no difference 048 return MP_EQ; 049 \} 050 #endif +051 \end{alltt} \end{small} @@ -1987,6 +2001,7 @@ $\vert a \vert < \vert b \vert$. Step number four will compare the two when the 036 \} 037 \} 038 #endif +039 \end{alltt} \end{small} @@ -2205,6 +2220,7 @@ The final carry is stored in $c_{max}$ and digits above $max$ upto $oldused$ are 102 return MP_OKAY; 103 \} 104 #endif +105 \end{alltt} \end{small} @@ -2376,6 +2392,7 @@ If $b$ has a smaller magnitude than $a$ then step 9 will force the carry and cop 082 \} 083 084 #endif +085 \end{alltt} \end{small} @@ -2511,6 +2528,7 @@ within algorithm s\_mp\_add will force $-0$ to become $0$. 046 \} 047 048 #endif +049 \end{alltt} \end{small} @@ -2623,6 +2641,7 @@ algorithm from producing $-a - -a = -0$ as a result. 052 \} 053 054 #endif +055 \end{alltt} \end{small} @@ -2757,6 +2776,7 @@ Step 8 clears any leading digits of $b$ in case it originally had a larger magni 075 return MP_OKAY; 076 \} 077 #endif +078 \end{alltt} \end{small} @@ -2857,6 +2877,7 @@ least significant bit not the most significant bit. 061 return MP_OKAY; 062 \} 063 #endif +064 \end{alltt} \end{small} @@ -2977,6 +2998,7 @@ step 8 sets the lower $b$ digits to zero. 060 return MP_OKAY; 061 \} 062 #endif +063 \end{alltt} \end{small} @@ -3088,6 +3110,7 @@ Once the window copy is complete the upper digits must be zeroed and the \textbf 065 a->used -= b; 066 \} 067 #endif +068 \end{alltt} \end{small} @@ -3221,6 +3244,7 @@ complete. It is possible to optimize this algorithm down to a $O(n)$ algorithm 078 return MP_OKAY; 079 \} 080 #endif +081 \end{alltt} \end{small} @@ -3357,6 +3381,7 @@ by using algorithm mp\_mod\_2d. 090 return MP_OKAY; 091 \} 092 #endif +093 \end{alltt} \end{small} @@ -3448,6 +3473,7 @@ is copied to $b$, leading digits are removed and the remaining leading digit is 048 return MP_OKAY; 049 \} 050 #endif +051 \end{alltt} \end{small} @@ -3687,6 +3713,7 @@ exceed the precision requested. 083 return MP_OKAY; 084 \} 085 #endif +086 \end{alltt} \end{small} @@ -3942,39 +3969,41 @@ and addition operations in the nested loop in parallel. 069 /* execute loop */ 070 for (iz = 0; iz < iy; ++iz) \{ 071 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); -072 \} -073 -074 /* store term */ -075 W[ix] = ((mp_digit)_W) & MP_MASK; -076 -077 /* make next carry */ -078 _W = _W >> ((mp_word)DIGIT_BIT); -079 \} -080 -081 /* store final carry */ -082 W[ix] = (mp_digit)(_W & MP_MASK); -083 -084 /* setup dest */ -085 olduse = c->used; -086 c->used = pa; -087 -088 \{ -089 register mp_digit *tmpc; -090 tmpc = c->dp; -091 for (ix = 0; ix < pa+1; ix++) \{ -092 /* now extract the previous digit [below the carry] */ -093 *tmpc++ = W[ix]; -094 \} -095 -096 /* clear unused digits [that existed in the old copy of c] */ -097 for (; ix < olduse; ix++) \{ -098 *tmpc++ = 0; -099 \} -100 \} -101 mp_clamp (c); -102 return MP_OKAY; -103 \} -104 #endif +072 +073 \} +074 +075 /* store term */ +076 W[ix] = ((mp_digit)_W) & MP_MASK; +077 +078 /* make next carry */ +079 _W = _W >> ((mp_word)DIGIT_BIT); +080 \} +081 +082 /* store final carry */ +083 W[ix] = (mp_digit)(_W & MP_MASK); +084 +085 /* setup dest */ +086 olduse = c->used; +087 c->used = pa; +088 +089 \{ +090 register mp_digit *tmpc; +091 tmpc = c->dp; +092 for (ix = 0; ix < pa+1; ix++) \{ +093 /* now extract the previous digit [below the carry] */ +094 *tmpc++ = W[ix]; +095 \} +096 +097 /* clear unused digits [that existed in the old copy of c] */ +098 for (; ix < olduse; ix++) \{ +099 *tmpc++ = 0; +100 \} +101 \} +102 mp_clamp (c); +103 return MP_OKAY; +104 \} +105 #endif +106 \end{alltt} \end{small} @@ -3982,7 +4011,7 @@ As per the pseudo--code we first calculate $pa$ (line 47) as the number of digit to produce the individual columns of the product. We use the two aliases $tmpx$ and $tmpy$ (lines 61, 62) to point inside the two multiplicands quickly. -The inner loop (lines 70 to 72) of this implementation is where the tradeoff come into play. Originally this comba +The inner loop (lines 70 to 73) of this implementation is where the tradeoff come into play. Originally this comba implementation was ``row--major'' which means it adds to each of the columns in each pass. After the outer loop it would then fix the carries. This was very fast except it had an annoying drawback. You had to read a mp\_word and two mp\_digits and write one mp\_word per iteration. On processors such as the Athlon XP and P4 this did not matter much since the cache bandwidth @@ -3990,8 +4019,8 @@ is very high and it can keep the ALU fed with data. It did, however, matter on slower and also often doesn't exist. This new algorithm only performs two reads per iteration under the assumption that the compiler has aliased $\_ \hat W$ to a CPU register. -After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines 75, 78) to forward it as -a carry for the next pass. After the outer loop we use the final carry (line 82) as the last digit of the product. +After the inner loop we store the current accumulator in $W$ and shift $\_ \hat W$ (lines 76, 79) to forward it as +a carry for the next pass. After the outer loop we use the final carry (line 83) as the last digit of the product. \subsection{Polynomial Basis Multiplication} To break the $O(n^2)$ barrier in multiplication requires a completely different look at integer multiplication. In the following algorithms @@ -4095,26 +4124,25 @@ general purpose multiplication. Given two polynomial basis representations $f(x light algebra \cite{KARAP} that the following polynomial is equivalent to multiplication of the two integers the polynomials represent. \begin{equation} -f(x) \cdot g(x) = acx^2 + ((a - b)(c - d) - (ac + bd))x + bd +f(x) \cdot g(x) = acx^2 + ((a + b)(c + d) - (ac + bd))x + bd \end{equation} Using the observation that $ac$ and $bd$ could be re-used only three half sized multiplications would be required to produce the product. Applying this algorithm recursively, the work factor becomes $O(n^{lg(3)})$ which is substantially better than the work factor $O(n^2)$ of the Comba technique. It turns out what Karatsuba did not know or at least did not publish was that this is simply polynomial basis multiplication with the points -$\zeta_0$, $\zeta_{\infty}$ and $-\zeta_{-1}$. Consider the resultant system of equations. +$\zeta_0$, $\zeta_{\infty}$ and $\zeta_{1}$. Consider the resultant system of equations. \begin{center} \begin{tabular}{rcrcrcrc} $\zeta_{0}$ & $=$ & & & & & $w_0$ \\ -$-\zeta_{-1}$ & $=$ & $-w_2$ & $+$ & $w_1$ & $-$ & $w_0$ \\ +$\zeta_{1}$ & $=$ & $w_2$ & $+$ & $w_1$ & $+$ & $w_0$ \\ $\zeta_{\infty}$ & $=$ & $w_2$ & & & & \\ \end{tabular} \end{center} By adding the first and last equation to the equation in the middle the term $w_1$ can be isolated and all three coefficients solved for. The simplicity of this system of equations has made Karatsuba fairly popular. In fact the cutoff point is often fairly low\footnote{With LibTomMath 0.18 it is 70 and 109 digits for the Intel P4 and AMD Athlon respectively.} -making it an ideal algorithm to speed up certain public key cryptosystems such as RSA and Diffie-Hellman. It is worth noting that the point -$\zeta_1$ could be substituted for $-\zeta_{-1}$. In this case the first and third row are subtracted instead of added to the second row. +making it an ideal algorithm to speed up certain public key cryptosystems such as RSA and Diffie-Hellman. \newpage\begin{figure}[!here] \begin{small} @@ -4137,13 +4165,13 @@ Split the input. e.g. $a = x1 \cdot \beta^B + x0$ \\ Calculate the three products. \\ 8. $x0y0 \leftarrow x0 \cdot y0$ (\textit{mp\_mul}) \\ 9. $x1y1 \leftarrow x1 \cdot y1$ \\ -10. $t1 \leftarrow x1 - x0$ (\textit{mp\_sub}) \\ -11. $x0 \leftarrow y1 - y0$ \\ +10. $t1 \leftarrow x1 + x0$ (\textit{mp\_add}) \\ +11. $x0 \leftarrow y1 + y0$ \\ 12. $t1 \leftarrow t1 \cdot x0$ \\ \\ Calculate the middle term. \\ 13. $x0 \leftarrow x0y0 + x1y1$ \\ -14. $t1 \leftarrow x0 - t1$ \\ +14. $t1 \leftarrow t1 - x0$ (\textit{s\_mp\_sub}) \\ \\ Calculate the final product. \\ 15. $t1 \leftarrow t1 \cdot \beta^B$ (\textit{mp\_lshd}) \\ @@ -4170,7 +4198,7 @@ smallest input \textbf{used} count. After the radix point is chosen the inputs compute the lower halves. Step 6 and 7 computer the upper halves. After the halves have been computed the three intermediate half-size products must be computed. Step 8 and 9 compute the trivial products -$x0 \cdot y0$ and $x1 \cdot y1$. The mp\_int $x0$ is used as a temporary variable after $x1 - x0$ has been computed. By using $x0$ instead +$x0 \cdot y0$ and $x1 \cdot y1$. The mp\_int $x0$ is used as a temporary variable after $x1 + x0$ has been computed. By using $x0$ instead of an additional temporary variable, the algorithm can avoid an addition memory allocation operation. The remaining steps 13 through 18 compute the Karatsuba polynomial through a variety of digit shifting and addition operations. @@ -4191,12 +4219,12 @@ The remaining steps 13 through 18 compute the Karatsuba polynomial through a var 025 * b = b1 * B**n + b0 026 * 027 * Then, a * b => -028 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0 +028 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 029 * 030 * Note that a1b1 and a0b0 are used twice and only need to be 031 * computed once. So in total three half size (half # of 032 * digit) multiplications are performed, a0b0, a1b1 and -033 * (a1-b1)(a0-b0) +033 * (a1+b1)(a0+b0) 034 * 035 * Note that a multiplication of half the digits requires 036 * 1/4th the number of single precision multiplications so in @@ -4287,19 +4315,19 @@ The remaining steps 13 through 18 compute the Karatsuba polynomial through a var 121 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) 122 goto X1Y1; /* x1y1 = x1*y1 */ 123 -124 /* now calc x1-x0 and y1-y0 */ -125 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) +124 /* now calc x1+x0 and y1+y0 */ +125 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 126 goto X1Y1; /* t1 = x1 - x0 */ -127 if (mp_sub (&y1, &y0, &x0) != MP_OKAY) +127 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) 128 goto X1Y1; /* t2 = y1 - y0 */ 129 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) -130 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ +130 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 131 132 /* add x0y0 */ 133 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) 134 goto X1Y1; /* t2 = x0y0 + x1y1 */ -135 if (mp_sub (&x0, &t1, &t1) != MP_OKAY) -136 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ +135 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) +136 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 137 138 /* shift by B */ 139 if (mp_lshd (&t1, B) != MP_OKAY) @@ -4326,6 +4354,7 @@ The remaining steps 13 through 18 compute the Karatsuba polynomial through a var 160 return err; 161 \} 162 #endif +163 \end{alltt} \end{small} @@ -4729,6 +4758,7 @@ result $a \cdot b$ is produced. 277 \} 278 279 #endif +280 \end{alltt} \end{small} @@ -4837,6 +4867,7 @@ s\_mp\_mul\_digs will clear it. 059 return res; 060 \} 061 #endif +062 \end{alltt} \end{small} @@ -5006,6 +5037,7 @@ results calculated so far. This involves expensive carry propagation which will 077 return MP_OKAY; 078 \} 079 #endif +080 \end{alltt} \end{small} @@ -5188,6 +5220,7 @@ only to even outputs and it is the square of the term at the $\lfloor ix / 2 \rf 107 return MP_OKAY; 108 \} 109 #endif +110 \end{alltt} \end{small} @@ -5205,10 +5238,10 @@ Let $h(x) = \left ( f(x) \right )^2$ represent the square of the polynomial. Th number with the following equation. \begin{equation} -h(x) = a^2x^2 + \left (a^2 + b^2 - (a - b)^2 \right )x + b^2 +h(x) = a^2x^2 + \left ((a + b)^2 - (a^2 + b^2) \right )x + b^2 \end{equation} -Upon closer inspection this equation only requires the calculation of three half-sized squares: $a^2$, $b^2$ and $(a - b)^2$. As in +Upon closer inspection this equation only requires the calculation of three half-sized squares: $a^2$, $b^2$ and $(a + b)^2$. As in Karatsuba multiplication, this algorithm can be applied recursively on the input and will achieve an asymptotic running time of $O \left ( n^{lg(3)} \right )$. @@ -5240,12 +5273,12 @@ Split the input. e.g. $a = x1\beta^B + x0$ \\ Calculate the three squares. \\ 6. $x0x0 \leftarrow x0^2$ (\textit{mp\_sqr}) \\ 7. $x1x1 \leftarrow x1^2$ \\ -8. $t1 \leftarrow x1 - x0$ (\textit{mp\_sub}) \\ +8. $t1 \leftarrow x1 + x0$ (\textit{s\_mp\_add}) \\ 9. $t1 \leftarrow t1^2$ \\ \\ Compute the middle term. \\ 10. $t2 \leftarrow x0x0 + x1x1$ (\textit{s\_mp\_add}) \\ -11. $t1 \leftarrow t2 - t1$ \\ +11. $t1 \leftarrow t1 - t2$ \\ \\ Compute final product. \\ 12. $t1 \leftarrow t1\beta^B$ (\textit{mp\_lshd}) \\ @@ -5268,7 +5301,7 @@ The radix point for squaring is simply placed exactly in the middle of the digit placed just below the middle. Step 3, 4 and 5 compute the two halves required using $B$ as the radix point. The first two squares in steps 6 and 7 are rather straightforward while the last square is of a more compact form. -By expanding $\left (x1 - x0 \right )^2$, the $x1^2$ and $x0^2$ terms in the middle disappear, that is $x1^2 + x0^2 - (x1 - x0)^2 = 2 \cdot x0 \cdot x1$. +By expanding $\left (x1 + x0 \right )^2$, the $x1^2$ and $x0^2$ terms in the middle disappear, that is $(x0 - x1)^2 - (x1^2 + x0^2) = 2 \cdot x0 \cdot x1$. Now if $5n$ single precision additions and a squaring of $n$-digits is faster than multiplying two $n$-digit numbers and doubling then this method is faster. Assuming no further recursions occur, the difference can be estimated with the following inequality. @@ -5363,8 +5396,8 @@ ratio of 1:7. } than simpler operations such as addition. 079 if (mp_sqr (&x1, &x1x1) != MP_OKAY) 080 goto X1X1; /* x1x1 = x1*x1 */ 081 -082 /* now calc (x1-x0)**2 */ -083 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) +082 /* now calc (x1+x0)**2 */ +083 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 084 goto X1X1; /* t1 = x1 - x0 */ 085 if (mp_sqr (&t1, &t1) != MP_OKAY) 086 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ @@ -5372,8 +5405,8 @@ ratio of 1:7. } than simpler operations such as addition. 088 /* add x0y0 */ 089 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) 090 goto X1X1; /* t2 = x0x0 + x1x1 */ -091 if (mp_sub (&t2, &t1, &t1) != MP_OKAY) -092 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */ +091 if (s_mp_sub (&t1, &t2, &t1) != MP_OKAY) +092 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 093 094 /* shift by B */ 095 if (mp_lshd (&t1, B) != MP_OKAY) @@ -5398,6 +5431,7 @@ ratio of 1:7. } than simpler operations such as addition. 114 return err; 115 \} 116 #endif +117 \end{alltt} \end{small} @@ -5494,6 +5528,7 @@ neither of the polynomial basis algorithms should be used then either the Comba 051 return res; 052 \} 053 #endif +054 \end{alltt} \end{small} @@ -5827,6 +5862,7 @@ performed at most twice, and on average once. However, if $a \ge b^2$ than it wi 093 return res; 094 \} 095 #endif +096 \end{alltt} \end{small} @@ -5879,6 +5915,7 @@ is equivalent and much faster. The final value is computed by taking the intege 027 return mp_div (a, b, a, NULL); 028 \} 029 #endif +030 \end{alltt} \end{small} @@ -6234,6 +6271,7 @@ multiplications. 111 return MP_OKAY; 112 \} 113 #endif +114 \end{alltt} \end{small} @@ -6478,6 +6516,7 @@ stored in the destination $x$. 165 return MP_OKAY; 166 \} 167 #endif +168 \end{alltt} \end{small} @@ -6505,7 +6544,7 @@ To calculate the variable $\rho$ a relatively simple algorithm will be required. \hline \\ 1. $b \leftarrow n_0$ \\ 2. If $b$ is even return(\textit{MP\_VAL}) \\ -3. $x \leftarrow ((b + 2) \mbox{ AND } 4) << 1) + b$ \\ +3. $x \leftarrow (((b + 2) \mbox{ AND } 4) << 1) + b$ \\ 4. for $k$ from 0 to $\lceil lg(lg(\beta)) \rceil - 2$ do \\ \hspace{3mm}4.1 $x \leftarrow x \cdot (2 - bx)$ \\ 5. $\rho \leftarrow \beta - x \mbox{ (mod }\beta\mbox{)}$ \\ @@ -6564,6 +6603,7 @@ to calculate $1/n_0$ when $\beta$ is a power of two. 052 return MP_OKAY; 053 \} 054 #endif +055 \end{alltt} \end{small} @@ -6830,6 +6870,7 @@ at step 3. 087 return MP_OKAY; 088 \} 089 #endif +090 \end{alltt} \end{small} @@ -6885,6 +6926,7 @@ completeness. 025 \} 026 027 #endif +028 \end{alltt} \end{small} @@ -6943,6 +6985,7 @@ step 3 then $n$ must be of Diminished Radix form. 036 \} 037 038 #endif +039 \end{alltt} \end{small} @@ -7027,6 +7070,7 @@ shift which makes the algorithm fairly inexpensive to use. 054 \} 055 056 #endif +057 \end{alltt} \end{small} @@ -7096,6 +7140,7 @@ is sufficient to solve for $k$. Alternatively if $n$ has more than one digit th 040 return MP_OKAY; 041 \} 042 #endif +043 \end{alltt} \end{small} @@ -7172,6 +7217,7 @@ This algorithm quickly determines if a modulus is of the form required for algor 045 \} 046 047 #endif +048 \end{alltt} \end{small} @@ -7381,6 +7427,7 @@ iteration of the loop moves the bits of the exponent $b$ upwards to the most sig 050 return MP_OKAY; 051 \} 052 #endif +053 \end{alltt} \end{small} @@ -7620,7 +7667,8 @@ algorithm since their arguments are essentially the same (\textit{two mp\_ints a 065 \} 066 067 /* modified diminished radix reduction */ -068 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) +068 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defin + ed(BN_S_MP_EXPTMOD_C) 069 if (mp_reduce_is_2k_l(P) == MP_YES) \{ 070 return s_mp_exptmod(G, X, P, Y, 1); 071 \} @@ -7660,6 +7708,7 @@ algorithm since their arguments are essentially the same (\textit{two mp\_ints a 105 \} 106 107 #endif +108 \end{alltt} \end{small} @@ -7839,251 +7888,251 @@ a Left-to-Right algorithm is used to process the remaining few bits. \hspace{-5.1mm}{\bf File}: bn\_s\_mp\_exptmod.c \vspace{-3mm} \begin{alltt} -016 -017 #ifdef MP_LOW_MEM -018 #define TAB_SIZE 32 -019 #else -020 #define TAB_SIZE 256 -021 #endif -022 -023 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmod +016 #ifdef MP_LOW_MEM +017 #define TAB_SIZE 32 +018 #else +019 #define TAB_SIZE 256 +020 #endif +021 +022 int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmod e) -024 \{ -025 mp_int M[TAB_SIZE], res, mu; -026 mp_digit buf; -027 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; -028 int (*redux)(mp_int*,mp_int*,mp_int*); -029 -030 /* find window size */ -031 x = mp_count_bits (X); -032 if (x <= 7) \{ -033 winsize = 2; -034 \} else if (x <= 36) \{ -035 winsize = 3; -036 \} else if (x <= 140) \{ -037 winsize = 4; -038 \} else if (x <= 450) \{ -039 winsize = 5; -040 \} else if (x <= 1303) \{ -041 winsize = 6; -042 \} else if (x <= 3529) \{ -043 winsize = 7; -044 \} else \{ -045 winsize = 8; -046 \} -047 -048 #ifdef MP_LOW_MEM -049 if (winsize > 5) \{ -050 winsize = 5; -051 \} -052 #endif -053 -054 /* init M array */ -055 /* init first cell */ -056 if ((err = mp_init(&M[1])) != MP_OKAY) \{ -057 return err; -058 \} -059 -060 /* now init the second half of the array */ -061 for (x = 1<<(winsize-1); x < (1 << winsize); x++) \{ -062 if ((err = mp_init(&M[x])) != MP_OKAY) \{ -063 for (y = 1<<(winsize-1); y < x; y++) \{ -064 mp_clear (&M[y]); -065 \} -066 mp_clear(&M[1]); -067 return err; -068 \} -069 \} -070 -071 /* create mu, used for Barrett reduction */ -072 if ((err = mp_init (&mu)) != MP_OKAY) \{ -073 goto LBL_M; -074 \} -075 -076 if (redmode == 0) \{ -077 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) \{ -078 goto LBL_MU; -079 \} -080 redux = mp_reduce; -081 \} else \{ -082 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) \{ -083 goto LBL_MU; -084 \} -085 redux = mp_reduce_2k_l; -086 \} -087 -088 /* create M table -089 * -090 * The M table contains powers of the base, -091 * e.g. M[x] = G**x mod P -092 * -093 * The first half of the table is not -094 * computed though accept for M[0] and M[1] -095 */ -096 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) \{ -097 goto LBL_MU; -098 \} -099 -100 /* compute the value at M[1<<(winsize-1)] by squaring -101 * M[1] (winsize-1) times -102 */ -103 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) \{ -104 goto LBL_MU; -105 \} -106 -107 for (x = 0; x < (winsize - 1); x++) \{ -108 /* square it */ -109 if ((err = mp_sqr (&M[1 << (winsize - 1)], -110 &M[1 << (winsize - 1)])) != MP_OKAY) \{ -111 goto LBL_MU; -112 \} -113 -114 /* reduce modulo P */ -115 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) \{ -116 goto LBL_MU; -117 \} -118 \} -119 -120 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) -121 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) -122 */ -123 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) \{ -124 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) \{ -125 goto LBL_MU; -126 \} -127 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) \{ -128 goto LBL_MU; -129 \} -130 \} -131 -132 /* setup result */ -133 if ((err = mp_init (&res)) != MP_OKAY) \{ -134 goto LBL_MU; -135 \} -136 mp_set (&res, 1); -137 -138 /* set initial mode and bit cnt */ -139 mode = 0; -140 bitcnt = 1; -141 buf = 0; -142 digidx = X->used - 1; -143 bitcpy = 0; -144 bitbuf = 0; -145 -146 for (;;) \{ -147 /* grab next digit as required */ -148 if (--bitcnt == 0) \{ -149 /* if digidx == -1 we are out of digits */ -150 if (digidx == -1) \{ -151 break; -152 \} -153 /* read next digit and reset the bitcnt */ -154 buf = X->dp[digidx--]; -155 bitcnt = (int) DIGIT_BIT; -156 \} -157 -158 /* grab the next msb from the exponent */ -159 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; -160 buf <<= (mp_digit)1; -161 -162 /* if the bit is zero and mode == 0 then we ignore it -163 * These represent the leading zero bits before the first 1 bit -164 * in the exponent. Technically this opt is not required but it -165 * does lower the # of trivial squaring/reductions used -166 */ -167 if (mode == 0 && y == 0) \{ -168 continue; -169 \} -170 -171 /* if the bit is zero and mode == 1 then we square */ -172 if (mode == 1 && y == 0) \{ -173 if ((err = mp_sqr (&res, &res)) != MP_OKAY) \{ -174 goto LBL_RES; -175 \} -176 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ -177 goto LBL_RES; -178 \} -179 continue; -180 \} -181 -182 /* else we add it to the window */ -183 bitbuf |= (y << (winsize - ++bitcpy)); -184 mode = 2; -185 -186 if (bitcpy == winsize) \{ -187 /* ok window is filled so square as required and multiply */ -188 /* square first */ -189 for (x = 0; x < winsize; x++) \{ -190 if ((err = mp_sqr (&res, &res)) != MP_OKAY) \{ -191 goto LBL_RES; -192 \} -193 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ -194 goto LBL_RES; -195 \} -196 \} -197 -198 /* then multiply */ -199 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) \{ -200 goto LBL_RES; -201 \} -202 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ -203 goto LBL_RES; -204 \} -205 -206 /* empty window and reset */ -207 bitcpy = 0; -208 bitbuf = 0; -209 mode = 1; -210 \} -211 \} -212 -213 /* if bits remain then square/multiply */ -214 if (mode == 2 && bitcpy > 0) \{ -215 /* square then multiply if the bit is set */ -216 for (x = 0; x < bitcpy; x++) \{ -217 if ((err = mp_sqr (&res, &res)) != MP_OKAY) \{ -218 goto LBL_RES; -219 \} -220 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ -221 goto LBL_RES; -222 \} -223 -224 bitbuf <<= 1; -225 if ((bitbuf & (1 << winsize)) != 0) \{ -226 /* then multiply */ -227 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) \{ -228 goto LBL_RES; -229 \} -230 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ -231 goto LBL_RES; -232 \} -233 \} -234 \} -235 \} -236 -237 mp_exch (&res, Y); -238 err = MP_OKAY; -239 LBL_RES:mp_clear (&res); -240 LBL_MU:mp_clear (&mu); -241 LBL_M: -242 mp_clear(&M[1]); -243 for (x = 1<<(winsize-1); x < (1 << winsize); x++) \{ -244 mp_clear (&M[x]); -245 \} -246 return err; -247 \} -248 #endif +023 \{ +024 mp_int M[TAB_SIZE], res, mu; +025 mp_digit buf; +026 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; +027 int (*redux)(mp_int*,mp_int*,mp_int*); +028 +029 /* find window size */ +030 x = mp_count_bits (X); +031 if (x <= 7) \{ +032 winsize = 2; +033 \} else if (x <= 36) \{ +034 winsize = 3; +035 \} else if (x <= 140) \{ +036 winsize = 4; +037 \} else if (x <= 450) \{ +038 winsize = 5; +039 \} else if (x <= 1303) \{ +040 winsize = 6; +041 \} else if (x <= 3529) \{ +042 winsize = 7; +043 \} else \{ +044 winsize = 8; +045 \} +046 +047 #ifdef MP_LOW_MEM +048 if (winsize > 5) \{ +049 winsize = 5; +050 \} +051 #endif +052 +053 /* init M array */ +054 /* init first cell */ +055 if ((err = mp_init(&M[1])) != MP_OKAY) \{ +056 return err; +057 \} +058 +059 /* now init the second half of the array */ +060 for (x = 1<<(winsize-1); x < (1 << winsize); x++) \{ +061 if ((err = mp_init(&M[x])) != MP_OKAY) \{ +062 for (y = 1<<(winsize-1); y < x; y++) \{ +063 mp_clear (&M[y]); +064 \} +065 mp_clear(&M[1]); +066 return err; +067 \} +068 \} +069 +070 /* create mu, used for Barrett reduction */ +071 if ((err = mp_init (&mu)) != MP_OKAY) \{ +072 goto LBL_M; +073 \} +074 +075 if (redmode == 0) \{ +076 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) \{ +077 goto LBL_MU; +078 \} +079 redux = mp_reduce; +080 \} else \{ +081 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) \{ +082 goto LBL_MU; +083 \} +084 redux = mp_reduce_2k_l; +085 \} +086 +087 /* create M table +088 * +089 * The M table contains powers of the base, +090 * e.g. M[x] = G**x mod P +091 * +092 * The first half of the table is not +093 * computed though accept for M[0] and M[1] +094 */ +095 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) \{ +096 goto LBL_MU; +097 \} +098 +099 /* compute the value at M[1<<(winsize-1)] by squaring +100 * M[1] (winsize-1) times +101 */ +102 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) \{ +103 goto LBL_MU; +104 \} +105 +106 for (x = 0; x < (winsize - 1); x++) \{ +107 /* square it */ +108 if ((err = mp_sqr (&M[1 << (winsize - 1)], +109 &M[1 << (winsize - 1)])) != MP_OKAY) \{ +110 goto LBL_MU; +111 \} +112 +113 /* reduce modulo P */ +114 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) \{ +115 goto LBL_MU; +116 \} +117 \} +118 +119 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) +120 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) +121 */ +122 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) \{ +123 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) \{ +124 goto LBL_MU; +125 \} +126 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) \{ +127 goto LBL_MU; +128 \} +129 \} +130 +131 /* setup result */ +132 if ((err = mp_init (&res)) != MP_OKAY) \{ +133 goto LBL_MU; +134 \} +135 mp_set (&res, 1); +136 +137 /* set initial mode and bit cnt */ +138 mode = 0; +139 bitcnt = 1; +140 buf = 0; +141 digidx = X->used - 1; +142 bitcpy = 0; +143 bitbuf = 0; +144 +145 for (;;) \{ +146 /* grab next digit as required */ +147 if (--bitcnt == 0) \{ +148 /* if digidx == -1 we are out of digits */ +149 if (digidx == -1) \{ +150 break; +151 \} +152 /* read next digit and reset the bitcnt */ +153 buf = X->dp[digidx--]; +154 bitcnt = (int) DIGIT_BIT; +155 \} +156 +157 /* grab the next msb from the exponent */ +158 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; +159 buf <<= (mp_digit)1; +160 +161 /* if the bit is zero and mode == 0 then we ignore it +162 * These represent the leading zero bits before the first 1 bit +163 * in the exponent. Technically this opt is not required but it +164 * does lower the # of trivial squaring/reductions used +165 */ +166 if (mode == 0 && y == 0) \{ +167 continue; +168 \} +169 +170 /* if the bit is zero and mode == 1 then we square */ +171 if (mode == 1 && y == 0) \{ +172 if ((err = mp_sqr (&res, &res)) != MP_OKAY) \{ +173 goto LBL_RES; +174 \} +175 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ +176 goto LBL_RES; +177 \} +178 continue; +179 \} +180 +181 /* else we add it to the window */ +182 bitbuf |= (y << (winsize - ++bitcpy)); +183 mode = 2; +184 +185 if (bitcpy == winsize) \{ +186 /* ok window is filled so square as required and multiply */ +187 /* square first */ +188 for (x = 0; x < winsize; x++) \{ +189 if ((err = mp_sqr (&res, &res)) != MP_OKAY) \{ +190 goto LBL_RES; +191 \} +192 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ +193 goto LBL_RES; +194 \} +195 \} +196 +197 /* then multiply */ +198 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) \{ +199 goto LBL_RES; +200 \} +201 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ +202 goto LBL_RES; +203 \} +204 +205 /* empty window and reset */ +206 bitcpy = 0; +207 bitbuf = 0; +208 mode = 1; +209 \} +210 \} +211 +212 /* if bits remain then square/multiply */ +213 if (mode == 2 && bitcpy > 0) \{ +214 /* square then multiply if the bit is set */ +215 for (x = 0; x < bitcpy; x++) \{ +216 if ((err = mp_sqr (&res, &res)) != MP_OKAY) \{ +217 goto LBL_RES; +218 \} +219 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ +220 goto LBL_RES; +221 \} +222 +223 bitbuf <<= 1; +224 if ((bitbuf & (1 << winsize)) != 0) \{ +225 /* then multiply */ +226 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) \{ +227 goto LBL_RES; +228 \} +229 if ((err = redux (&res, P, &mu)) != MP_OKAY) \{ +230 goto LBL_RES; +231 \} +232 \} +233 \} +234 \} +235 +236 mp_exch (&res, Y); +237 err = MP_OKAY; +238 LBL_RES:mp_clear (&res); +239 LBL_MU:mp_clear (&mu); +240 LBL_M: +241 mp_clear(&M[1]); +242 for (x = 1<<(winsize-1); x < (1 << winsize); x++) \{ +243 mp_clear (&M[x]); +244 \} +245 return err; +246 \} +247 #endif +248 \end{alltt} \end{small} -Lines 21 through 40 determine the optimal window size based on the length of the exponent in bits. The window divisions are sorted +Lines 31 through 41 determine the optimal window size based on the length of the exponent in bits. The window divisions are sorted from smallest to greatest so that in each \textbf{if} statement only one condition must be tested. For example, by the \textbf{if} statement -on line 32 the value of $x$ is already known to be greater than $140$. +on line 33 the value of $x$ is already known to be greater than $140$. -The conditional piece of code beginning on line 48 allows the window size to be restricted to five bits. This logic is used to ensure +The conditional piece of code beginning on line 47 allows the window size to be restricted to five bits. This logic is used to ensure the table of precomputed powers of $G$ remains relatively small. -The for loop on line 61 initializes the $M$ array while lines 62 and 77 compute the value of $\mu$ required for +The for loop on line 60 initializes the $M$ array while lines 61 and 76 compute the value of $\mu$ required for Barrett reduction. -- More later. @@ -8146,6 +8195,7 @@ equivalent to $m \cdot 2^k$. By this logic when $m = 1$ a quick power of two ca 041 return MP_OKAY; 042 \} 043 #endif +044 \end{alltt} \end{small} @@ -8666,6 +8716,7 @@ respectively be replaced with a zero. 285 #endif 286 287 #endif +288 \end{alltt} \end{small} @@ -8820,6 +8871,7 @@ This algorithm initiates a temporary mp\_int with the value of the single digit 102 \} 103 104 #endif +105 \end{alltt} \end{small} @@ -8929,6 +8981,7 @@ Unlike the full multiplication algorithms this algorithm does not require any si 072 return MP_OKAY; 073 \} 074 #endif +075 \end{alltt} \end{small} @@ -9074,6 +9127,7 @@ from chapter seven. 103 \} 104 105 #endif +106 \end{alltt} \end{small} @@ -9260,6 +9314,7 @@ root. Ideally this algorithm is meant to find the $n$'th root of an input where 125 return res; 126 \} 127 #endif +128 \end{alltt} \end{small} @@ -9336,6 +9391,7 @@ the integers from $0$ to $\beta - 1$. 048 return MP_OKAY; 049 \} 050 #endif +051 \end{alltt} \end{small} @@ -9480,6 +9536,7 @@ as part of larger input without any significant problem. 075 return MP_OKAY; 076 \} 077 #endif +078 \end{alltt} \end{small} @@ -9599,6 +9656,7 @@ are required instead of a series of $n \times k$ divisions. One design flaw of 068 \} 069 070 #endif +071 \end{alltt} \end{small} @@ -9879,6 +9937,7 @@ must be adjusted by multiplying by the common factors of two ($2^k$) removed ear 106 return res; 107 \} 108 #endif +109 \end{alltt} \end{small} @@ -9974,6 +10033,7 @@ dividing the product of the two inputs by their greatest common divisor. 053 return res; 054 \} 055 #endif +056 \end{alltt} \end{small} @@ -10218,6 +10278,7 @@ $\left ( {p' \over a'} \right )$ which is multiplied against the current Jacobi 098 return res; 099 \} 100 #endif +101 \end{alltt} \end{small} @@ -10366,6 +10427,7 @@ then only a couple of additions or subtractions will be required to adjust the i 036 return MP_VAL; 037 \} 038 #endif +039 \end{alltt} \end{small} @@ -10467,6 +10529,7 @@ This algorithm attempts to determine if a candidate integer $n$ is composite by 043 return MP_OKAY; 044 \} 045 #endif +046 \end{alltt} \end{small} @@ -10518,6 +10581,7 @@ mp\_digit. The table \_\_prime\_tab is defined in the following file. 054 #endif 055 \}; 056 #endif +057 \end{alltt} \end{small} @@ -10606,6 +10670,7 @@ determine the result. 055 return err; 056 \} 057 #endif +058 \end{alltt} \end{small} @@ -10741,6 +10806,7 @@ composite then it is \textit{probably} prime. 096 return err; 097 \} 098 #endif +099 \end{alltt} \end{small} diff --git a/tommath_class.h b/tommath_class.h index 6d05b7b..68b88b9 100644 --- a/tommath_class.h +++ b/tommath_class.h @@ -687,6 +687,7 @@ #if defined(BN_MP_READ_RADIX_C) #define BN_MP_ZERO_C #define BN_MP_S_RMAP_C + #define BN_MP_RADIX_SMAP_C #define BN_MP_MUL_D_C #define BN_MP_ADD_D_C #define BN_MP_ISZERO_C @@ -992,3 +993,7 @@ #else #define LTM_LAST #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */ diff --git a/tommath_superclass.h b/tommath_superclass.h index b50ecb0..1b26841 100644 --- a/tommath_superclass.h +++ b/tommath_superclass.h @@ -4,7 +4,7 @@ #define LTM_ALL /* RSA only (does not support DH/DSA/ECC) */ -// #define SC_RSA_1 +/* #define SC_RSA_1 */ /* For reference.... On an Athlon64 optimizing for speed... @@ -70,3 +70,7 @@ #endif #endif + +/* $Source$ */ +/* $Revision$ */ +/* $Date$ */