From 410ae3951e16b29d35ddd731456ae6eb83b6e320 Mon Sep 17 00:00:00 2001
From: Steffen Jaeckel <s@jaeckel.eu>
Date: Tue, 14 Oct 2014 13:48:23 +0200
Subject: [PATCH] trim trailing spaces

---
 gen.pl        |   4 +-
 mtest/mpi.c   | 152 +++++++--------
 pre_gen/mpi.c | 514 +++++++++++++++++++++++++-------------------------
 3 files changed, 336 insertions(+), 334 deletions(-)

diff --git a/gen.pl b/gen.pl
index 7236591..57f65ac 100644
--- a/gen.pl
+++ b/gen.pl
@@ -14,4 +14,6 @@ foreach my $filename (glob "bn*.c") {
    close SRC or die "Error closing $filename after reading: $!";
 }
 print OUT "\n/* EOF */\n";
-close OUT or die "Error closing mpi.c after writing: $!";
\ No newline at end of file
+close OUT or die "Error closing mpi.c after writing: $!";
+
+system('perl -pli -e "s/\s*$//" mpi.c');
diff --git a/mtest/mpi.c b/mtest/mpi.c
index bc40bcf..d475c5e 100644
--- a/mtest/mpi.c
+++ b/mtest/mpi.c
@@ -22,7 +22,7 @@
 #define DIAG(T,V)
 #endif
 
-/* 
+/*
    If MP_LOGTAB is not defined, use the math library to compute the
    logarithms on the fly.  Otherwise, use the static table below.
    Pick which works best for your system.
@@ -33,7 +33,7 @@
 
 /*
   A table of the logs of 2 for various bases (the 0 and 1 entries of
-  this table are meaningless and should not be referenced).  
+  this table are meaningless and should not be referenced).
 
   This table is used to compute output lengths for the mp_toradix()
   function.  Since a number n in radix r takes up about log_r(n)
@@ -43,7 +43,7 @@
   log_r(n) = log_2(n) * log_r(2)
 
   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
-  which are the output bases supported.  
+  which are the output bases supported.
  */
 
 #include "logtab.h"
@@ -104,7 +104,7 @@ static const char *mp_err_string[] = {
 /* Value to digit maps for radix conversion   */
 
 /* s_dmap_1 - standard digits and letters */
-static const char *s_dmap_1 = 
+static const char *s_dmap_1 =
   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
 
 #if 0
@@ -117,7 +117,7 @@ static const char *s_dmap_2 =
 
 /* {{{ Static function declarations */
 
-/* 
+/*
    If MP_MACRO is false, these will be defined as actual functions;
    otherwise, suitable macro definitions will be used.  This works
    around the fact that ANSI C89 doesn't support an 'inline' keyword
@@ -258,7 +258,7 @@ mp_err mp_init_array(mp_int mp[], int count)
   return MP_OKAY;
 
  CLEANUP:
-  while(--pos >= 0) 
+  while(--pos >= 0)
     mp_clear(&mp[pos]);
 
   return res;
@@ -355,7 +355,7 @@ mp_err mp_copy(mp_int *from, mp_int *to)
     if(ALLOC(to) >= USED(from)) {
       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
-      
+
     } else {
       if((tmp = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL)
 	return MP_MEM;
@@ -445,7 +445,7 @@ void   mp_clear_array(mp_int mp[], int count)
 {
   ARGCHK(mp != NULL && count > 0, MP_BADARG);
 
-  while(--count >= 0) 
+  while(--count >= 0)
     mp_clear(&mp[count]);
 
 } /* end mp_clear_array() */
@@ -455,7 +455,7 @@ void   mp_clear_array(mp_int mp[], int count)
 /* {{{ mp_zero(mp) */
 
 /*
-  mp_zero(mp) 
+  mp_zero(mp)
 
   Set mp to zero.  Does not change the allocated size of the structure,
   and therefore cannot fail (except on a bad argument, which we ignore)
@@ -506,7 +506,7 @@ mp_err mp_set_int(mp_int *mp, long z)
     if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY)
       return res;
 
-    res = s_mp_add_d(mp, 
+    res = s_mp_add_d(mp,
 		     (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
     if(res != MP_OKAY)
       return res;
@@ -841,9 +841,9 @@ mp_err mp_neg(mp_int *a, mp_int *b)
   if((res = mp_copy(a, b)) != MP_OKAY)
     return res;
 
-  if(s_mp_cmp_d(b, 0) == MP_EQ) 
+  if(s_mp_cmp_d(b, 0) == MP_EQ)
     SIGN(b) = MP_ZPOS;
-  else 
+  else
     SIGN(b) = (SIGN(b) == MP_NEG) ? MP_ZPOS : MP_NEG;
 
   return MP_OKAY;
@@ -870,7 +870,7 @@ mp_err mp_add(mp_int *a, mp_int *b, mp_int *c)
   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
 
     /* Commutativity of addition lets us do this in either order,
-       so we avoid having to use a temporary even if the result 
+       so we avoid having to use a temporary even if the result
        is supposed to replace the output
      */
     if(c == b) {
@@ -880,14 +880,14 @@ mp_err mp_add(mp_int *a, mp_int *b, mp_int *c)
       if(c != a && (res = mp_copy(a, c)) != MP_OKAY)
 	return res;
 
-      if((res = s_mp_add(c, b)) != MP_OKAY) 
+      if((res = s_mp_add(c, b)) != MP_OKAY)
 	return res;
     }
 
   } else if((cmp = s_mp_cmp(a, b)) > 0) {  /* different sign: a > b   */
 
     /* If the output is going to be clobbered, we will use a temporary
-       variable; otherwise, we'll do it without touching the memory 
+       variable; otherwise, we'll do it without touching the memory
        allocator at all, if possible
      */
     if(c == b) {
@@ -1019,7 +1019,7 @@ mp_err mp_sub(mp_int *a, mp_int *b, mp_int *c)
       mp_clear(&tmp);
 
     } else {
-      if(c != b && ((res = mp_copy(b, c)) != MP_OKAY)) 
+      if(c != b && ((res = mp_copy(b, c)) != MP_OKAY))
 	return res;
 
       if((res = s_mp_sub(c, a)) != MP_OKAY)
@@ -1066,12 +1066,12 @@ mp_err mp_mul(mp_int *a, mp_int *b, mp_int *c)
     if((res = s_mp_mul(c, b)) != MP_OKAY)
       return res;
   }
-  
+
   if(sgn == MP_ZPOS || s_mp_cmp_d(c, 0) == MP_EQ)
     SIGN(c) = MP_ZPOS;
   else
     SIGN(c) = sgn;
-  
+
   return MP_OKAY;
 
 } /* end mp_mul() */
@@ -1160,7 +1160,7 @@ mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r)
 	return res;
     }
 
-    if(q) 
+    if(q)
       mp_zero(q);
 
     return MP_OKAY;
@@ -1206,10 +1206,10 @@ mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r)
     SIGN(&rtmp) = MP_ZPOS;
 
   /* Copy output, if it is needed      */
-  if(q) 
+  if(q)
     s_mp_exch(&qtmp, q);
 
-  if(r) 
+  if(r)
     s_mp_exch(&rtmp, r);
 
 CLEANUP:
@@ -1286,12 +1286,12 @@ mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
     /* Loop over bits of each non-maximal digit */
     for(bit = 0; bit < DIGIT_BIT; bit++) {
       if(d & 1) {
-	if((res = s_mp_mul(&s, &x)) != MP_OKAY) 
+	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
 	  goto CLEANUP;
       }
 
       d >>= 1;
-      
+
       if((res = s_mp_sqr(&x)) != MP_OKAY)
 	goto CLEANUP;
     }
@@ -1311,7 +1311,7 @@ mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
     if((res = s_mp_sqr(&x)) != MP_OKAY)
       goto CLEANUP;
   }
-  
+
   if(mp_iseven(b))
     SIGN(&s) = SIGN(a);
 
@@ -1362,7 +1362,7 @@ mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c)
 
   /*
      If |a| > m, we need to divide to get the remainder and take the
-     absolute value.  
+     absolute value.
 
      If |a| < m, we don't need to do any division, just copy and adjust
      the sign (if a is negative).
@@ -1376,7 +1376,7 @@ mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c)
   if((mag = s_mp_cmp(a, m)) > 0) {
     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
       return res;
-    
+
     if(SIGN(c) == MP_NEG) {
       if((res = mp_add(c, m, c)) != MP_OKAY)
 	return res;
@@ -1391,7 +1391,7 @@ mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c)
 	return res;
 
     }
-    
+
   } else {
     mp_zero(c);
 
@@ -1464,9 +1464,9 @@ mp_err mp_sqrt(mp_int *a, mp_int *b)
     return MP_RANGE;
 
   /* Special cases for zero and one, trivial     */
-  if(mp_cmp_d(a, 0) == MP_EQ || mp_cmp_d(a, 1) == MP_EQ) 
+  if(mp_cmp_d(a, 0) == MP_EQ || mp_cmp_d(a, 1) == MP_EQ)
     return mp_copy(a, b);
-    
+
   /* Initialize the temporaries we'll use below  */
   if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
     return res;
@@ -1508,7 +1508,7 @@ mp_add_d(&x, 1, &x);
  CLEANUP:
   mp_clear(&x);
  X:
-  mp_clear(&t); 
+  mp_clear(&t);
 
   return res;
 
@@ -1626,7 +1626,7 @@ mp_err mp_sqrmod(mp_int *a, mp_int *m, mp_int *c)
   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
   method with modular reductions at each step. (This is basically the
   same code as mp_expt(), except for the addition of the reductions)
-  
+
   The modular reductions are done using Barrett's algorithm (see
   s_mp_reduce() below for details)
  */
@@ -1655,7 +1655,7 @@ mp_err mp_exptmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
   mp_set(&s, 1);
 
   /* mu = b^2k / m */
-  s_mp_add_d(&mu, 1); 
+  s_mp_add_d(&mu, 1);
   s_mp_lshd(&mu, 2 * USED(m));
   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
     goto CLEANUP;
@@ -1866,7 +1866,7 @@ int    mp_cmp_int(mp_int *a, long z)
   int     out;
 
   ARGCHK(a != NULL, MP_EQ);
-  
+
   mp_init(&tmp); mp_set_int(&tmp, z);
   out = mp_cmp(a, &tmp);
   mp_clear(&tmp);
@@ -1953,13 +1953,13 @@ mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
   if(mp_isodd(&u)) {
     if((res = mp_copy(&v, &t)) != MP_OKAY)
       goto CLEANUP;
-    
+
     /* t = -v */
     if(SIGN(&v) == MP_ZPOS)
       SIGN(&t) = MP_NEG;
     else
       SIGN(&t) = MP_ZPOS;
-    
+
   } else {
     if((res = mp_copy(&u, &t)) != MP_OKAY)
       goto CLEANUP;
@@ -2152,7 +2152,7 @@ mp_err mp_xgcd(mp_int *a, mp_int *b, mp_int *g, mp_int *x, mp_int *y)
 
       if(y)
 	if((res = mp_copy(&D, y)) != MP_OKAY) goto CLEANUP;
-      
+
       if(g)
 	if((res = mp_mul(&gx, &v, g)) != MP_OKAY) goto CLEANUP;
 
@@ -2255,7 +2255,7 @@ void   mp_print(mp_int *mp, FILE *ofp)
 
 /* {{{ mp_read_signed_bin(mp, str, len) */
 
-/* 
+/*
    mp_read_signed_bin(mp, str, len)
 
    Read in a raw value (base 256) into the given mp_int
@@ -2332,16 +2332,16 @@ mp_err  mp_read_unsigned_bin(mp_int *mp, unsigned char *str, int len)
     if((res = mp_add_d(mp, str[ix], mp)) != MP_OKAY)
       return res;
   }
-  
+
   return MP_OKAY;
-  
+
 } /* end mp_read_unsigned_bin() */
 
 /* }}} */
 
 /* {{{ mp_unsigned_bin_size(mp) */
 
-int     mp_unsigned_bin_size(mp_int *mp) 
+int     mp_unsigned_bin_size(mp_int *mp)
 {
   mp_digit   topdig;
   int        count;
@@ -2440,7 +2440,7 @@ int    mp_count_bits(mp_int *mp)
   }
 
   return len;
-  
+
 } /* end mp_count_bits() */
 
 /* }}} */
@@ -2462,14 +2462,14 @@ mp_err  mp_read_radix(mp_int *mp, unsigned char *str, int radix)
   mp_err  res;
   mp_sign sig = MP_ZPOS;
 
-  ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, 
+  ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
 	 MP_BADARG);
 
   mp_zero(mp);
 
   /* Skip leading non-digit characters until a digit or '-' or '+' */
-  while(str[ix] && 
-	(s_mp_tovalue(str[ix], radix) < 0) && 
+  while(str[ix] &&
+	(s_mp_tovalue(str[ix], radix) < 0) &&
 	str[ix] != '-' &&
 	str[ix] != '+') {
     ++ix;
@@ -2525,7 +2525,7 @@ int    mp_radix_size(mp_int *mp, int radix)
 /* num = number of digits
    qty = number of bits per digit
    radix = target base
-   
+
    Return the number of digits in the specified radix that would be
    needed to express 'num' digits of 'qty' bits each.
  */
@@ -2594,7 +2594,7 @@ mp_err mp_toradix(mp_int *mp, unsigned char *str, int radix)
       ++ix;
       --pos;
     }
-    
+
     mp_clear(&tmp);
   }
 
@@ -2806,18 +2806,18 @@ void     s_mp_exch(mp_int *a, mp_int *b)
 
 /* {{{ s_mp_lshd(mp, p) */
 
-/* 
+/*
    Shift mp leftward by p digits, growing if needed, and zero-filling
    the in-shifted digits at the right end.  This is a convenient
    alternative to multiplication by powers of the radix
- */   
+ */
 
 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
 {
   mp_err   res;
   mp_size  pos;
   mp_digit *dp;
-  int     ix;
+  int ix;
 
   if(p == 0)
     return MP_OKAY;
@@ -2829,7 +2829,7 @@ mp_err   s_mp_lshd(mp_int *mp, mp_size p)
   dp = DIGITS(mp);
 
   /* Shift all the significant figures over as needed */
-  for(ix = pos - p; ix >= 0; ix--) 
+  for(ix = pos - p; ix >= 0; ix--)
     dp[ix + p] = dp[ix];
 
   /* Fill the bottom digits with zeroes */
@@ -2844,7 +2844,7 @@ mp_err   s_mp_lshd(mp_int *mp, mp_size p)
 
 /* {{{ s_mp_rshd(mp, p) */
 
-/* 
+/*
    Shift mp rightward by p digits.  Maintains the invariant that
    digits above the precision are all zero.  Digits shifted off the
    end are lost.  Cannot fail.
@@ -3054,7 +3054,7 @@ void     s_mp_div_2d(mp_int *mp, mp_digit d)
   end of the division process).
 
   We multiply by the smallest power of 2 that gives us a leading digit
-  at least half the radix.  By choosing a power of 2, we simplify the 
+  at least half the radix.  By choosing a power of 2, we simplify the
   multiplication and division steps to simple shifts.
  */
 mp_digit s_mp_norm(mp_int *a, mp_int *b)
@@ -3066,7 +3066,7 @@ mp_digit s_mp_norm(mp_int *a, mp_int *b)
     t <<= 1;
     ++d;
   }
-    
+
   if(d != 0) {
     s_mp_mul_2d(a, d);
     s_mp_mul_2d(b, d);
@@ -3188,14 +3188,14 @@ mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
      test guarantees we have enough storage to do this safely.
    */
   if(k) {
-    dp[max] = k; 
+    dp[max] = k;
     USED(a) = max + 1;
   }
 
   s_mp_clamp(a);
 
   return MP_OKAY;
-  
+
 } /* end s_mp_mul_d() */
 
 /* }}} */
@@ -3289,7 +3289,7 @@ mp_err   s_mp_add(mp_int *a, mp_int *b)        /* magnitude addition      */
   }
 
   /* If we run out of 'b' digits before we're actually done, make
-     sure the carries get propagated upward...  
+     sure the carries get propagated upward...
    */
   used = USED(a);
   while(w && ix < used) {
@@ -3351,7 +3351,7 @@ mp_err   s_mp_sub(mp_int *a, mp_int *b)        /* magnitude subtract      */
   /* Clobber any leading zeroes we created    */
   s_mp_clamp(a);
 
-  /* 
+  /*
      If there was a borrow out, then |b| > |a| in violation
      of our input invariant.  We've already done the work,
      but we'll at least complain about it...
@@ -3387,7 +3387,7 @@ mp_err   s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
   s_mp_mod_2d(&q, (mp_digit)(DIGIT_BIT * (um + 1)));
 #else
   s_mp_mul_dig(&q, m, um + 1);
-#endif  
+#endif
 
   /* x = x - q */
   if((res = mp_sub(x, &q, x)) != MP_OKAY)
@@ -3441,7 +3441,7 @@ mp_err   s_mp_mul(mp_int *a, mp_int *b)
 
   pb = DIGITS(b);
   for(ix = 0; ix < ub; ++ix, ++pb) {
-    if(*pb == 0) 
+    if(*pb == 0)
       continue;
 
     /* Inner product:  Digits of a */
@@ -3480,7 +3480,7 @@ void   s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len)
   for(ix = 0; ix < len; ++ix, ++b) {
     if(*b == 0)
       continue;
-    
+
     pa = a;
     for(jx = 0; jx < len; ++jx, ++pa) {
       pt = out + ix + jx;
@@ -3547,7 +3547,7 @@ mp_err   s_mp_sqr(mp_int *a)
      */
     for(jx = ix + 1, pa2 = DIGITS(a) + jx; jx < used; ++jx, ++pa2) {
       mp_word  u = 0, v;
-      
+
       /* Store this in a temporary to avoid indirections later */
       pt = pbt + ix + jx;
 
@@ -3568,7 +3568,7 @@ mp_err   s_mp_sqr(mp_int *a)
       v = *pt + k;
 
       /* If we do not already have an overflow carry, check to see
-	 if the addition will cause one, and set the carry out if so 
+	 if the addition will cause one, and set the carry out if so
        */
       u |= ((MP_WORD_MAX - v) < w);
 
@@ -3592,7 +3592,7 @@ mp_err   s_mp_sqr(mp_int *a)
     /* If we are carrying out, propagate the carry to the next digit
        in the output.  This may cascade, so we have to be somewhat
        circumspect -- but we will have enough precision in the output
-       that we won't overflow 
+       that we won't overflow
      */
     kx = 1;
     while(k) {
@@ -3664,7 +3664,7 @@ mp_err   s_mp_div(mp_int *a, mp_int *b)
   while(ix >= 0) {
     /* Find a partial substring of a which is at least b */
     while(s_mp_cmp(&rem, b) < 0 && ix >= 0) {
-      if((res = s_mp_lshd(&rem, 1)) != MP_OKAY) 
+      if((res = s_mp_lshd(&rem, 1)) != MP_OKAY)
 	goto CLEANUP;
 
       if((res = s_mp_lshd(&quot, 1)) != MP_OKAY)
@@ -3676,8 +3676,8 @@ mp_err   s_mp_div(mp_int *a, mp_int *b)
     }
 
     /* If we didn't find one, we're finished dividing    */
-    if(s_mp_cmp(&rem, b) < 0) 
-      break;    
+    if(s_mp_cmp(&rem, b) < 0)
+      break;
 
     /* Compute a guess for the next quotient digit       */
     q = DIGIT(&rem, USED(&rem) - 1);
@@ -3695,7 +3695,7 @@ mp_err   s_mp_div(mp_int *a, mp_int *b)
     if((res = s_mp_mul_d(&t, q)) != MP_OKAY)
       goto CLEANUP;
 
-    /* 
+    /*
        If it's too big, back it off.  We should not have to do this
        more than once, or, in rare cases, twice.  Knuth describes a
        method by which this could be reduced to a maximum of once, but
@@ -3719,7 +3719,7 @@ mp_err   s_mp_div(mp_int *a, mp_int *b)
   }
 
   /* Denormalize remainder                */
-  if(d != 0) 
+  if(d != 0)
     s_mp_div_2d(&rem, d);
 
   s_mp_clamp(&quot);
@@ -3727,7 +3727,7 @@ mp_err   s_mp_div(mp_int *a, mp_int *b)
 
   /* Copy quotient back to output         */
   s_mp_exch(&quot, a);
-  
+
   /* Copy remainder back to output        */
   s_mp_exch(&rem, b);
 
@@ -3757,7 +3757,7 @@ mp_err   s_mp_2expt(mp_int *a, mp_digit k)
   mp_zero(a);
   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
     return res;
-  
+
   DIGIT(a, dig) |= (1 << bit);
 
   return MP_OKAY;
@@ -3815,7 +3815,7 @@ int      s_mp_cmp_d(mp_int *a, mp_digit d)
   if(ua > 1)
     return MP_GT;
 
-  if(*ap < d) 
+  if(*ap < d)
     return MP_LT;
   else if(*ap > d)
     return MP_GT;
@@ -3857,7 +3857,7 @@ int      s_mp_ispow2(mp_int *v)
     }
 
     return ((uv - 1) * DIGIT_BIT) + extra;
-  } 
+  }
 
   return -1;
 
@@ -3901,7 +3901,7 @@ int      s_mp_ispow2d(mp_digit d)
 int      s_mp_tovalue(char ch, int r)
 {
   int    val, xch;
-  
+
   if(r > 36)
     xch = ch;
   else
@@ -3917,7 +3917,7 @@ int      s_mp_tovalue(char ch, int r)
     val = 62;
   else if(xch == '/')
     val = 63;
-  else 
+  else
     return -1;
 
   if(val < 0 || val >= r)
@@ -3939,7 +3939,7 @@ int      s_mp_tovalue(char ch, int r)
   The results may be odd if you use a radix < 2 or > 64, you are
   expected to know what you're doing.
  */
-  
+
 char     s_mp_todigit(int val, int r, int low)
 {
   char   ch;
@@ -3960,7 +3960,7 @@ char     s_mp_todigit(int val, int r, int low)
 
 /* {{{ s_mp_outlen(bits, radix) */
 
-/* 
+/*
    Return an estimate for how long a string is needed to hold a radix
    r representation of a number with 'bits' significant bits.
 
diff --git a/pre_gen/mpi.c b/pre_gen/mpi.c
index c0f860c..0d55d73 100644
--- a/pre_gen/mpi.c
+++ b/pre_gen/mpi.c
@@ -67,10 +67,10 @@ char *mp_error_to_string(int code)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* computes the modular inverse via binary extended euclidean algorithm, 
- * that is c = 1/a mod b 
+/* computes the modular inverse via binary extended euclidean algorithm,
+ * that is c = 1/a mod b
  *
- * Based on slow invmod except this is optimized for the case where b is 
+ * Based on slow invmod except this is optimized for the case where b is
  * odd as per HAC Note 14.64 on pp. 610
  */
 int fast_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
@@ -397,15 +397,15 @@ int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
 
 /* Fast (comba) multiplier
  *
- * This is the fast column-array [comba] multiplier.  It is 
- * designed to compute the columns of the product first 
- * then handle the carries afterwards.  This has the effect 
+ * This is the fast column-array [comba] multiplier.  It is
+ * designed to compute the columns of the product first
+ * then handle the carries afterwards.  This has the effect
  * of making the nested loops that compute the columns very
  * simple and schedulable on super-scalar processors.
  *
- * This has been modified to produce a variable number of 
- * digits of output so if say only a half-product is required 
- * you don't have to compute the upper half (a feature 
+ * This has been modified to produce a variable number of
+ * digits of output so if say only a half-product is required
+ * you don't have to compute the upper half (a feature
  * required for fast Barrett reduction).
  *
  * Based on Algorithm 14.12 on pp.595 of HAC.
@@ -429,7 +429,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 
   /* clear the carry */
   _W = 0;
-  for (ix = 0; ix < pa; ix++) { 
+  for (ix = 0; ix < pa; ix++) {
       int      tx, ty;
       int      iy;
       mp_digit *tmpx, *tmpy;
@@ -442,7 +442,7 @@ int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       tmpx = a->dp + tx;
       tmpy = b->dp + ty;
 
-      /* this is the number of times the loop will iterrate, essentially 
+      /* this is the number of times the loop will iterrate, essentially
          while (tx++ < a->used && ty-- >= 0) { ... }
        */
       iy = MIN(a->used-tx, ty+1);
@@ -532,7 +532,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   /* number of output digits to produce */
   pa = a->used + b->used;
   _W = 0;
-  for (ix = digs; ix < pa; ix++) { 
+  for (ix = digs; ix < pa; ix++) {
       int      tx, ty, iy;
       mp_digit *tmpx, *tmpy;
 
@@ -544,7 +544,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       tmpx = a->dp + tx;
       tmpy = b->dp + ty;
 
-      /* this is the number of times the loop will iterrate, essentially its 
+      /* this is the number of times the loop will iterrate, essentially its
          while (tx++ < a->used && ty-- >= 0) { ... }
        */
       iy = MIN(a->used-tx, ty+1);
@@ -560,7 +560,7 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
       /* make next carry */
       _W = _W >> ((mp_word)DIGIT_BIT);
   }
-  
+
   /* setup dest */
   olduse  = c->used;
   c->used = pa;
@@ -609,10 +609,10 @@ int fast_s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
  */
 
 /* the jist of squaring...
- * you do like mult except the offset of the tmpx [one that 
- * starts closer to zero] can't equal the offset of tmpy.  
+ * you do like mult except the offset of the tmpx [one that
+ * starts closer to zero] can't equal the offset of tmpy.
  * So basically you set up iy like before then you min it with
- * (ty-tx) so that it never happens.  You double all those 
+ * (ty-tx) so that it never happens.  You double all those
  * you add in the inner loop
 
 After that loop you do the squares and add them in.
@@ -634,7 +634,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
 
   /* number of output digits to produce */
   W1 = 0;
-  for (ix = 0; ix < pa; ix++) { 
+  for (ix = 0; ix < pa; ix++) {
       int      tx, ty, iy;
       mp_word  _W;
       mp_digit *tmpy;
@@ -655,7 +655,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
        */
       iy = MIN(a->used-tx, ty+1);
 
-      /* now for squaring tx can never equal ty 
+      /* now for squaring tx can never equal ty
        * we halve the distance since they approach at a rate of 2x
        * and we have to round because odd cases need to be executed
        */
@@ -726,7 +726,7 @@ int fast_s_mp_sqr (mp_int * a, mp_int * b)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* computes a = 2**b 
+/* computes a = 2**b
  *
  * Simple algorithm which zeroes the int, grows it then just sets one bit
  * as required.
@@ -778,7 +778,7 @@ mp_2expt (mp_int * a, int b)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* b = |a| 
+/* b = |a|
  *
  * Simple function copies the input and fixes the sign to positive
  */
@@ -1104,7 +1104,7 @@ mp_and (mp_int * a, mp_int * b, mp_int * c)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* trim unused digits 
+/* trim unused digits
  *
  * This is used to ensure that leading zero digits are
  * trimed and the leading "used" digit will be non-zero
@@ -1201,7 +1201,7 @@ mp_clear (mp_int * a)
  */
 #include <stdarg.h>
 
-void mp_clear_multi(mp_int *mp, ...) 
+void mp_clear_multi(mp_int *mp, ...)
 {
     mp_int* next_mp = mp;
     va_list args;
@@ -1250,7 +1250,7 @@ mp_cmp (mp_int * a, mp_int * b)
         return MP_GT;
      }
   }
-  
+
   /* compare digits */
   if (a->sign == MP_NEG) {
      /* if negative compare opposite direction */
@@ -1343,7 +1343,7 @@ int mp_cmp_mag (mp_int * a, mp_int * b)
   if (a->used > b->used) {
     return MP_GT;
   }
-  
+
   if (a->used < b->used) {
     return MP_LT;
   }
@@ -1392,7 +1392,7 @@ int mp_cmp_mag (mp_int * a, mp_int * b)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-static const int lnz[16] = { 
+static const int lnz[16] = {
    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
 };
 
@@ -1535,7 +1535,7 @@ mp_count_bits (mp_int * a)
 
   /* get number of digits and add that */
   r = (a->used - 1) * DIGIT_BIT;
-  
+
   /* take the last digit and count the bits in it */
   q = a->dp[a->used - 1];
   while (q > ((mp_digit) 0)) {
@@ -1605,7 +1605,7 @@ int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
   mp_set(&tq, 1);
   n = mp_count_bits(a) - mp_count_bits(b);
   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
-      ((res = mp_abs(b, &tb)) != MP_OKAY) || 
+      ((res = mp_abs(b, &tb)) != MP_OKAY) ||
       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
       goto LBL_ERR;
@@ -1642,17 +1642,17 @@ LBL_ERR:
 
 #else
 
-/* integer signed division. 
+/* integer signed division.
  * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
  * HAC pp.598 Algorithm 14.20
  *
- * Note that the description in HAC is horribly 
- * incomplete.  For example, it doesn't consider 
- * the case where digits are removed from 'x' in 
- * the inner loop.  It also doesn't consider the 
+ * Note that the description in HAC is horribly
+ * incomplete.  For example, it doesn't consider
+ * the case where digits are removed from 'x' in
+ * the inner loop.  It also doesn't consider the
  * case that y has fewer than three digits, etc..
  *
- * The overall algorithm is as described as 
+ * The overall algorithm is as described as
  * 14.20 from HAC but fixed to treat these cases.
 */
 int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
@@ -1742,7 +1742,7 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
       continue;
     }
 
-    /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
+    /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
     if (x.dp[i] == y.dp[t]) {
       q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
@@ -1756,10 +1756,10 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
       q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
     }
 
-    /* while (q{i-t-1} * (yt * b + y{t-1})) > 
-             xi * b**2 + xi-1 * b + xi-2 
-     
-       do q{i-t-1} -= 1; 
+    /* while (q{i-t-1} * (yt * b + y{t-1})) >
+             xi * b**2 + xi-1 * b + xi-2
+
+       do q{i-t-1} -= 1;
     */
     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
     do {
@@ -1810,10 +1810,10 @@ int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
     }
   }
 
-  /* now q is the quotient and x is the remainder 
-   * [which we have to normalize] 
+  /* now q is the quotient and x is the remainder
+   * [which we have to normalize]
    */
-  
+
   /* get sign before writing to c */
   x.sign = x.used == 0 ? MP_ZPOS : a->sign;
 
@@ -2047,14 +2047,14 @@ mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
   mp_word  w, t;
   mp_digit b;
   int      res, ix;
-  
+
   /* b = 2**DIGIT_BIT / 3 */
   b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
 
   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
      return res;
   }
-  
+
   q.used = a->used;
   q.sign = a->sign;
   w = 0;
@@ -2092,7 +2092,7 @@ mp_div_3 (mp_int * a, mp_int *c, mp_digit * d)
      mp_exch(&q, c);
   }
   mp_clear(&q);
-  
+
   return res;
 }
 
@@ -2186,13 +2186,13 @@ int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
   if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
      return res;
   }
-  
+
   q.used = a->used;
   q.sign = a->sign;
   w = 0;
   for (ix = a->used - 1; ix >= 0; ix--) {
      w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
-     
+
      if (w >= b) {
         t = (mp_digit)(w / b);
         w -= ((mp_word)t) * ((mp_word)b);
@@ -2201,17 +2201,17 @@ int mp_div_d (mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
       }
       q.dp[ix] = (mp_digit)t;
   }
-  
+
   if (d != NULL) {
      *d = (mp_digit)w;
   }
-  
+
   if (c != NULL) {
      mp_clamp(&q);
      mp_exch(&q, c);
   }
   mp_clear(&q);
-  
+
   return res;
 }
 
@@ -2392,7 +2392,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d)
    /* the casts are required if DIGIT_BIT is one less than
     * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
     */
-   *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 
+   *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
         ((mp_word)a->dp[0]));
 }
 
@@ -2422,7 +2422,7 @@ void mp_dr_setup(mp_int *a, mp_digit *d)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* swap the elements of two integers, for cases where you can't simply swap the 
+/* swap the elements of two integers, for cases where you can't simply swap the
  * mp_int pointers around
  */
 void
@@ -2565,7 +2565,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
      err = mp_exptmod(&tmpG, &tmpX, P, Y);
      mp_clear_multi(&tmpG, &tmpX, NULL);
      return err;
-#else 
+#else
      /* no invmod */
      return MP_VAL;
 #endif
@@ -2592,7 +2592,7 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
      dr = mp_reduce_is_2k(P) << 1;
   }
 #endif
-    
+
   /* if the modulus is odd or dr != 0 use the montgomery method */
 #ifdef BN_MP_EXPTMOD_FAST_C
   if (mp_isodd (P) == 1 || dr !=  0) {
@@ -2706,7 +2706,7 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode
 
   /* determine and setup reduction code */
   if (redmode == 0) {
-#ifdef BN_MP_MONTGOMERY_SETUP_C     
+#ifdef BN_MP_MONTGOMERY_SETUP_C
      /* now setup montgomery  */
      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
         goto LBL_M;
@@ -2721,7 +2721,7 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode
      if (((P->used * 2 + 1) < MP_WARRAY) &&
           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
         redux = fast_mp_montgomery_reduce;
-     } else 
+     } else
 #endif
      {
 #ifdef BN_MP_MONTGOMERY_REDUCE_C
@@ -2772,7 +2772,7 @@ int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode
      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
        goto LBL_RES;
      }
-#else 
+#else
      err = MP_VAL;
      goto LBL_RES;
 #endif
@@ -2962,7 +2962,7 @@ LBL_M:
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* Extended euclidean algorithm of (a, b) produces 
+/* Extended euclidean algorithm of (a, b) produces
    a*u1 + b*u2 = u3
  */
 int mp_exteuclid(mp_int *a, mp_int *b, mp_int *U1, mp_int *U2, mp_int *U3)
@@ -3052,10 +3052,10 @@ _ERR: mp_clear_multi(&u1, &u2, &u3, &v1, &v2, &v3, &t1, &t2, &t3, &q, &tmp, NULL
 int mp_fread(mp_int *a, int radix, FILE *stream)
 {
    int err, ch, neg, y;
-   
+
    /* clear a */
    mp_zero(a);
-   
+
    /* if first digit is - then set negative */
    ch = fgetc(stream);
    if (ch == '-') {
@@ -3064,7 +3064,7 @@ int mp_fread(mp_int *a, int radix, FILE *stream)
    } else {
       neg = MP_ZPOS;
    }
-   
+
    for (;;) {
       /* find y in the radix map */
       for (y = 0; y < radix; y++) {
@@ -3075,7 +3075,7 @@ int mp_fread(mp_int *a, int radix, FILE *stream)
       if (y == radix) {
          break;
       }
-      
+
       /* shift up and add */
       if ((err = mp_mul_d(a, radix, a)) != MP_OKAY) {
          return err;
@@ -3083,13 +3083,13 @@ int mp_fread(mp_int *a, int radix, FILE *stream)
       if ((err = mp_add_d(a, y, a)) != MP_OKAY) {
          return err;
       }
-      
+
       ch = fgetc(stream);
    }
    if (mp_cmp_d(a, 0) != MP_EQ) {
       a->sign = neg;
    }
-   
+
    return MP_OKAY;
 }
 
@@ -3123,7 +3123,7 @@ int mp_fwrite(mp_int *a, int radix, FILE *stream)
 {
    char *buf;
    int err, len, x;
-   
+
    if ((err = mp_radix_size(a, radix, &len)) != MP_OKAY) {
       return err;
    }
@@ -3132,19 +3132,19 @@ int mp_fwrite(mp_int *a, int radix, FILE *stream)
    if (buf == NULL) {
       return MP_MEM;
    }
-   
+
    if ((err = mp_toradix(a, buf, radix)) != MP_OKAY) {
       XFREE (buf);
       return err;
    }
-   
+
    for (x = 0; x < len; x++) {
        if (fputc(buf[x], stream) == EOF) {
           XFREE (buf);
           return MP_VAL;
        }
    }
-   
+
    XFREE (buf);
    return MP_OKAY;
 }
@@ -3236,17 +3236,17 @@ int mp_gcd (mp_int * a, mp_int * b, mp_int * c)
         /* swap u and v to make sure v is >= u */
         mp_exch(&u, &v);
      }
-     
+
      /* subtract smallest from largest */
      if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
         goto LBL_V;
      }
-     
+
      /* Divide out all factors of two */
      if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
         goto LBL_V;
-     } 
-  } 
+     }
+  }
 
   /* multiply by 2**k which we divided out at the beginning */
   if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
@@ -3285,7 +3285,7 @@ LBL_U:mp_clear (&v);
  */
 
 /* get the lower 32-bits of an mp_int */
-unsigned long mp_get_int(mp_int * a) 
+unsigned long mp_get_int(mp_int * a)
 {
   int i;
   unsigned long res;
@@ -3299,7 +3299,7 @@ unsigned long mp_get_int(mp_int * a)
 
   /* get most significant digit of result */
   res = DIGIT(a,i);
-   
+
   while (--i >= 0) {
     res = (res << DIGIT_BIT) | DIGIT(a,i);
   }
@@ -3481,7 +3481,7 @@ int mp_init_copy (mp_int * a, mp_int * b)
  */
 #include <stdarg.h>
 
-int mp_init_multi(mp_int *mp, ...) 
+int mp_init_multi(mp_int *mp, ...)
 {
     mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
     int n = 0;                 /* Number of ok inits */
@@ -3495,11 +3495,11 @@ int mp_init_multi(mp_int *mp, ...)
                succeeded in init-ing, then return error.
             */
             va_list clean_args;
-            
+
             /* end the current list */
             va_end(args);
-            
-            /* now start cleaning up */            
+
+            /* now start cleaning up */
             cur_arg = mp;
             va_start(clean_args, mp);
             while (n--) {
@@ -3621,7 +3621,7 @@ int mp_init_size (mp_int * a, int size)
 
   /* pad size so there are always extra digits */
   size += (MP_PREC * 2) - (size % MP_PREC);	
-  
+
   /* alloc mem */
   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
   if (a->dp == NULL) {
@@ -3725,7 +3725,7 @@ int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
   }
 
   /* init temps */
-  if ((res = mp_init_multi(&x, &y, &u, &v, 
+  if ((res = mp_init_multi(&x, &y, &u, &v,
                            &A, &B, &C, &D, NULL)) != MP_OKAY) {
      return res;
   }
@@ -3852,14 +3852,14 @@ top:
          goto LBL_ERR;
       }
   }
-  
+
   /* too big */
   while (mp_cmp_mag(&C, b) != MP_LT) {
       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
          goto LBL_ERR;
       }
   }
-  
+
   /* C is now the inverse */
   mp_exch (&C, c);
   res = MP_OKAY;
@@ -3915,7 +3915,7 @@ static const char rem_105[105] = {
 };
 
 /* Store non-zero to ret if arg is square, and zero if not */
-int mp_is_square(mp_int *arg,int *ret) 
+int mp_is_square(mp_int *arg,int *ret)
 {
   int           res;
   mp_digit      c;
@@ -3923,7 +3923,7 @@ int mp_is_square(mp_int *arg,int *ret)
   unsigned long r;
 
   /* Default to Non-square :) */
-  *ret = MP_NO; 
+  *ret = MP_NO;
 
   if (arg->sign == MP_NEG) {
     return MP_VAL;
@@ -3957,8 +3957,8 @@ int mp_is_square(mp_int *arg,int *ret)
   r = mp_get_int(&t);
   /* Check for other prime modules, note it's not an ERROR but we must
    * free "t" so the easiest way is to goto ERR.  We know that res
-   * is already equal to MP_OKAY from the mp_mod call 
-   */ 
+   * is already equal to MP_OKAY from the mp_mod call
+   */
   if ( (1L<<(r%11)) & 0x5C4L )             goto ERR;
   if ( (1L<<(r%13)) & 0x9E4L )             goto ERR;
   if ( (1L<<(r%17)) & 0x5CE8L )            goto ERR;
@@ -4114,33 +4114,33 @@ LBL_A1:mp_clear (&a1);
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* c = |a| * |b| using Karatsuba Multiplication using 
+/* c = |a| * |b| using Karatsuba Multiplication using
  * three half size multiplications
  *
- * Let B represent the radix [e.g. 2**DIGIT_BIT] and 
- * let n represent half of the number of digits in 
+ * Let B represent the radix [e.g. 2**DIGIT_BIT] and
+ * let n represent half of the number of digits in
  * the min(a,b)
  *
  * a = a1 * B**n + a0
  * b = b1 * B**n + b0
  *
- * Then, a * b => 
+ * Then, a * b =>
    a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
  *
- * Note that a1b1 and a0b0 are used twice and only need to be 
- * computed once.  So in total three half size (half # of 
- * digit) multiplications are performed, a0b0, a1b1 and 
+ * Note that a1b1 and a0b0 are used twice and only need to be
+ * computed once.  So in total three half size (half # of
+ * digit) multiplications are performed, a0b0, a1b1 and
  * (a1+b1)(a0+b0)
  *
  * Note that a multiplication of half the digits requires
- * 1/4th the number of single precision multiplications so in 
- * total after one call 25% of the single precision multiplications 
- * are saved.  Note also that the call to mp_mul can end up back 
- * in this function if the a0, a1, b0, or b1 are above the threshold.  
- * This is known as divide-and-conquer and leads to the famous 
- * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 
- * the standard O(N**2) that the baseline/comba methods use.  
- * Generally though the overhead of this method doesn't pay off 
+ * 1/4th the number of single precision multiplications so in
+ * total after one call 25% of the single precision multiplications
+ * are saved.  Note also that the call to mp_mul can end up back
+ * in this function if the a0, a1, b0, or b1 are above the threshold.
+ * This is known as divide-and-conquer and leads to the famous
+ * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
+ * the standard O(N**2) that the baseline/comba methods use.
+ * Generally though the overhead of this method doesn't pay off
  * until a certain size (N ~ 80) is reached.
  */
 int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
@@ -4208,7 +4208,7 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
     }
   }
 
-  /* only need to clamp the lower words since by definition the 
+  /* only need to clamp the lower words since by definition the
    * upper words x1/y1 must have a known number of digits
    */
   mp_clamp (&x0);
@@ -4216,7 +4216,7 @@ int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c)
 
   /* now calc the products x0y0 and x1y1 */
   /* after this x0 is no longer required, free temp [x0==t2]! */
-  if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)  
+  if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
     goto X1Y1;          /* x0y0 = x0*y0 */
   if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
     goto X1Y1;          /* x1y1 = x1*y1 */
@@ -4285,11 +4285,11 @@ ERR:
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* Karatsuba squaring, computes b = a*a using three 
+/* Karatsuba squaring, computes b = a*a using three
  * half size squarings
  *
- * See comments of karatsuba_mul for details.  It 
- * is essentially the same algorithm but merely 
+ * See comments of karatsuba_mul for details.  It
+ * is essentially the same algorithm but merely
  * tuned to perform recursive squarings.
  */
 int mp_karatsuba_sqr (mp_int * a, mp_int * b)
@@ -4945,29 +4945,29 @@ int mp_mul (mp_int * a, mp_int * b, mp_int * c)
 #ifdef BN_MP_TOOM_MUL_C
   if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
     res = mp_toom_mul(a, b, c);
-  } else 
+  } else
 #endif
 #ifdef BN_MP_KARATSUBA_MUL_C
   /* use Karatsuba? */
   if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
     res = mp_karatsuba_mul (a, b, c);
-  } else 
+  } else
 #endif
   {
     /* can we use the fast multiplier?
      *
-     * The fast multiplier can be used if the output will 
-     * have less than MP_WARRAY digits and the number of 
+     * The fast multiplier can be used if the output will
+     * have less than MP_WARRAY digits and the number of
      * digits won't affect carry propagation
      */
     int     digs = a->used + b->used + 1;
 
 #ifdef BN_FAST_S_MP_MUL_DIGS_C
     if ((digs < MP_WARRAY) &&
-        MIN(a->used, b->used) <= 
+        MIN(a->used, b->used) <=
         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
       res = fast_s_mp_mul_digs (a, b, c, digs);
-    } else 
+    } else
 #endif
 #ifdef BN_S_MP_MUL_DIGS_C
       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
@@ -5025,24 +5025,24 @@ int mp_mul_2(mp_int * a, mp_int * b)
 
     /* alias for source */
     tmpa = a->dp;
-    
+
     /* alias for dest */
     tmpb = b->dp;
 
     /* carry */
     r = 0;
     for (x = 0; x < a->used; x++) {
-    
-      /* get what will be the *next* carry bit from the 
-       * MSB of the current digit 
+
+      /* get what will be the *next* carry bit from the
+       * MSB of the current digit
        */
       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
-      
+
       /* now shift up this digit, add in the carry [from the previous] */
       *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
-      
-      /* copy the carry that would be from the source 
-       * digit into the next iteration 
+
+      /* copy the carry that would be from the source
+       * digit into the next iteration
        */
       r = rr;
     }
@@ -5054,8 +5054,8 @@ int mp_mul_2(mp_int * a, mp_int * b)
       ++(b->used);
     }
 
-    /* now zero any excess digits on the destination 
-     * that we didn't write to 
+    /* now zero any excess digits on the destination
+     * that we didn't write to
      */
     tmpb = b->dp + b->used;
     for (x = b->used; x < oldused; x++) {
@@ -5145,7 +5145,7 @@ int mp_mul_2d (mp_int * a, int b, mp_int * c)
       /* set the carry to the carry bits of the current word */
       r = rr;
     }
-    
+
     /* set final carry */
     if (r != 0) {
        c->dp[(c->used)++] = r;
@@ -5307,14 +5307,14 @@ int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* find the n'th root of an integer 
+/* find the n'th root of an integer
  *
- * Result found such that (c)**b <= a and (c+1)**b > a 
+ * Result found such that (c)**b <= a and (c+1)**b > a
  *
- * This algorithm uses Newton's approximation 
- * x[i+1] = x[i] - f(x[i])/f'(x[i]) 
- * which will find the root in log(N) time where 
- * each step involves a fair bit.  This is not meant to 
+ * This algorithm uses Newton's approximation
+ * x[i+1] = x[i] - f(x[i])/f'(x[i])
+ * which will find the root in log(N) time where
+ * each step involves a fair bit.  This is not meant to
  * find huge roots [square and cube, etc].
  */
 int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
@@ -5353,31 +5353,31 @@ int mp_n_root (mp_int * a, mp_digit b, mp_int * c)
     }
 
     /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */
-    
+
     /* t3 = t1**(b-1) */
-    if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {   
+    if ((res = mp_expt_d (&t1, b - 1, &t3)) != MP_OKAY) {
       goto LBL_T3;
     }
 
     /* numerator */
     /* t2 = t1**b */
-    if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {    
+    if ((res = mp_mul (&t3, &t1, &t2)) != MP_OKAY) {
       goto LBL_T3;
     }
 
     /* t2 = t1**b - a */
-    if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {  
+    if ((res = mp_sub (&t2, a, &t2)) != MP_OKAY) {
       goto LBL_T3;
     }
 
     /* denominator */
     /* t3 = t1**(b-1) * b  */
-    if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {    
+    if ((res = mp_mul_d (&t3, b, &t3)) != MP_OKAY) {
       goto LBL_T3;
     }
 
     /* t3 = (t1**b - a)/(b * t1**(b-1)) */
-    if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {  
+    if ((res = mp_div (&t2, &t3, &t3, NULL)) != MP_OKAY) {
       goto LBL_T3;
     }
 
@@ -5542,7 +5542,7 @@ int mp_or (mp_int * a, mp_int * b, mp_int * c)
  */
 
 /* performs one Fermat test.
- * 
+ *
  * If "a" were prime then b**a == b (mod a) since the order of
  * the multiplicative sub-group would be phi(a) = a-1.  That means
  * it would be the same as b**(a mod (a-1)) == b**1 == b (mod a).
@@ -5607,7 +5607,7 @@ LBL_T:mp_clear (&t);
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* determines if an integers is divisible by one 
+/* determines if an integers is divisible by one
  * of the first PRIME_SIZE primes or not
  *
  * sets result to 0 if not, 1 if yes
@@ -5748,11 +5748,11 @@ LBL_B:mp_clear (&b);
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* Miller-Rabin test of "a" to the base of "b" as described in 
+/* Miller-Rabin test of "a" to the base of "b" as described in
  * HAC pp. 139 Algorithm 4.24
  *
  * Sets result to 0 if definitely composite or 1 if probably prime.
- * Randomly the chance of error is no more than 1/4 and often 
+ * Randomly the chance of error is no more than 1/4 and often
  * very much lower.
  */
 int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
@@ -5766,7 +5766,7 @@ int mp_prime_miller_rabin (mp_int * a, mp_int * b, int *result)
   /* ensure b > 1 */
   if (mp_cmp_d(b, 1) != MP_GT) {
      return MP_VAL;
-  }     
+  }
 
   /* get n1 = a - 1 */
   if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
@@ -6088,7 +6088,7 @@ int mp_prime_rabin_miller_trials(int size)
 /* makes a truly random prime of a given size (bits),
  *
  * Flags are as follows:
- * 
+ *
  *   LTM_PRIME_BBS      - make prime congruent to 3 mod 4
  *   LTM_PRIME_SAFE     - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
  *   LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
@@ -6133,7 +6133,7 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
    maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
    if (flags & LTM_PRIME_2MSB_ON) {
       maskOR_msb       |= 0x80 >> ((9 - size) & 7);
-   }  
+   }
 
    /* get the maskOR_lsb */
    maskOR_lsb         = 1;
@@ -6147,7 +6147,7 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
          err = MP_VAL;
          goto error;
       }
- 
+
       /* work over the MSbyte */
       tmp[0]    &= maskAND;
       tmp[0]    |= 1 << ((size - 1) & 7);
@@ -6161,7 +6161,7 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
 
       /* is it prime? */
       if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY)           { goto error; }
-      if (res == MP_NO) {  
+      if (res == MP_NO) {
          continue;
       }
 
@@ -6169,7 +6169,7 @@ int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback
          /* see if (a-1)/2 is prime */
          if ((err = mp_sub_d(a, 1, a)) != MP_OKAY)                    { goto error; }
          if ((err = mp_div_2(a, a)) != MP_OKAY)                       { goto error; }
- 
+
          /* is it prime? */
          if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY)        { goto error; }
       }
@@ -6253,7 +6253,7 @@ int mp_radix_size (mp_int * a, int radix, int *size)
   }
 
   /* force temp to positive */
-  t.sign = MP_ZPOS; 
+  t.sign = MP_ZPOS;
 
   /* fetch out all of the digits */
   while (mp_iszero (&t) == MP_NO) {
@@ -6397,8 +6397,8 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
     return MP_VAL;
   }
 
-  /* if the leading digit is a 
-   * minus set the sign to negative. 
+  /* if the leading digit is a
+   * minus set the sign to negative.
    */
   if (*str == '-') {
     ++str;
@@ -6409,7 +6409,7 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
 
   /* set the integer to the default of zero */
   mp_zero (a);
-  
+
   /* process each digit of the string */
   while (*str) {
     /* if the radix < 36 the conversion is case insensitive
@@ -6423,9 +6423,9 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
       }
     }
 
-    /* if the char was found in the map 
+    /* if the char was found in the map
      * and is less than the given radix add it
-     * to the number, otherwise exit the loop. 
+     * to the number, otherwise exit the loop.
      */
     if (y < radix) {
       if ((res = mp_mul_d (a, (mp_digit) radix, a)) != MP_OKAY) {
@@ -6439,7 +6439,7 @@ int mp_read_radix (mp_int * a, const char *str, int radix)
     }
     ++str;
   }
-  
+
   /* set the sign only if a != 0 */
   if (mp_iszero(a) != 1) {
      a->sign = neg;
@@ -6576,7 +6576,7 @@ int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* reduces x mod m, assumes 0 < x < m**2, mu is 
+/* reduces x mod m, assumes 0 < x < m**2, mu is
  * precomputed via mp_reduce_setup.
  * From HAC pp.604 Algorithm 14.42
  */
@@ -6591,7 +6591,7 @@ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
   }
 
   /* q1 = x / b**(k-1)  */
-  mp_rshd (&q, um - 1);         
+  mp_rshd (&q, um - 1);
 
   /* according to HAC this optimization is ok */
   if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
@@ -6607,8 +6607,8 @@ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
       goto CLEANUP;
     }
-#else 
-    { 
+#else
+    {
       res = MP_VAL;
       goto CLEANUP;
     }
@@ -6616,7 +6616,7 @@ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
   }
 
   /* q3 = q2 / b**(k+1) */
-  mp_rshd (&q, um + 1);         
+  mp_rshd (&q, um + 1);
 
   /* x = x mod b**(k+1), quick (no division) */
   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
@@ -6648,7 +6648,7 @@ int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
       goto CLEANUP;
     }
   }
-  
+
 CLEANUP:
   mp_clear (&q);
 
@@ -6685,35 +6685,35 @@ int mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
 {
    mp_int q;
    int    p, res;
-   
+
    if ((res = mp_init(&q)) != MP_OKAY) {
       return res;
    }
-   
-   p = mp_count_bits(n);    
+
+   p = mp_count_bits(n);
 top:
    /* q = a/2**p, a = a mod 2**p */
    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
       goto ERR;
    }
-   
+
    if (d != 1) {
       /* q = q * d */
-      if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 
+      if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
          goto ERR;
       }
    }
-   
+
    /* a = a + q */
    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
       goto ERR;
    }
-   
+
    if (mp_cmp_mag(a, n) != MP_LT) {
       s_mp_sub(a, n, a);
       goto top;
    }
-   
+
 ERR:
    mp_clear(&q);
    return res;
@@ -6745,7 +6745,7 @@ ERR:
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* reduces a modulo n where n is of the form 2**p - d 
+/* reduces a modulo n where n is of the form 2**p - d
    This differs from reduce_2k since "d" can be larger
    than a single digit.
 */
@@ -6753,33 +6753,33 @@ int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
 {
    mp_int q;
    int    p, res;
-   
+
    if ((res = mp_init(&q)) != MP_OKAY) {
       return res;
    }
-   
-   p = mp_count_bits(n);    
+
+   p = mp_count_bits(n);
 top:
    /* q = a/2**p, a = a mod 2**p */
    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
       goto ERR;
    }
-   
+
    /* q = q * d */
-   if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 
+   if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
       goto ERR;
    }
-   
+
    /* a = a + q */
    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
       goto ERR;
    }
-   
+
    if (mp_cmp_mag(a, n) != MP_LT) {
       s_mp_sub(a, n, a);
       goto top;
    }
-   
+
 ERR:
    mp_clear(&q);
    return res;
@@ -6816,22 +6816,22 @@ int mp_reduce_2k_setup(mp_int *a, mp_digit *d)
 {
    int res, p;
    mp_int tmp;
-   
+
    if ((res = mp_init(&tmp)) != MP_OKAY) {
       return res;
    }
-   
+
    p = mp_count_bits(a);
    if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
       mp_clear(&tmp);
       return res;
    }
-   
+
    if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
       mp_clear(&tmp);
       return res;
    }
-   
+
    *d = tmp.dp[0];
    mp_clear(&tmp);
    return MP_OKAY;
@@ -6867,19 +6867,19 @@ int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
 {
    int    res;
    mp_int tmp;
-   
+
    if ((res = mp_init(&tmp)) != MP_OKAY) {
       return res;
    }
-   
+
    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
       goto ERR;
    }
-   
+
    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
       goto ERR;
    }
-   
+
 ERR:
    mp_clear(&tmp);
    return res;
@@ -6915,7 +6915,7 @@ int mp_reduce_is_2k(mp_int *a)
 {
    int ix, iy, iw;
    mp_digit iz;
-   
+
    if (a->used == 0) {
       return MP_NO;
    } else if (a->used == 1) {
@@ -6924,7 +6924,7 @@ int mp_reduce_is_2k(mp_int *a)
       iy = mp_count_bits(a);
       iz = 1;
       iw = 1;
-    
+
       /* Test every bit from the second digit up, must be 1 */
       for (ix = DIGIT_BIT; ix < iy; ix++) {
           if ((a->dp[iw] & iz) == 0) {
@@ -6970,7 +6970,7 @@ int mp_reduce_is_2k(mp_int *a)
 int mp_reduce_is_2k_l(mp_int *a)
 {
    int ix, iy;
-   
+
    if (a->used == 0) {
       return MP_NO;
    } else if (a->used == 1) {
@@ -6983,7 +6983,7 @@ int mp_reduce_is_2k_l(mp_int *a)
           }
       }
       return (iy >= (a->used/2)) ? MP_YES : MP_NO;
-      
+
    }
    return MP_NO;
 }
@@ -7020,7 +7020,7 @@ int mp_reduce_is_2k_l(mp_int *a)
 int mp_reduce_setup (mp_int * a, mp_int * b)
 {
   int     res;
-  
+
   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
     return res;
   }
@@ -7079,8 +7079,8 @@ void mp_rshd (mp_int * a, int b)
     /* top [offset into digits] */
     top = a->dp + b;
 
-    /* this is implemented as a sliding window where 
-     * the window is b-digits long and digits from 
+    /* this is implemented as a sliding window where
+     * the window is b-digits long and digits from
      * the top of the window are copied to the bottom
      *
      * e.g.
@@ -7098,7 +7098,7 @@ void mp_rshd (mp_int * a, int b)
       *bottom++ = 0;
     }
   }
-  
+
   /* remove excess digits */
   a->used -= b;
 }
@@ -7167,7 +7167,7 @@ int mp_set_int (mp_int * a, unsigned long b)
   int     x, res;
 
   mp_zero (a);
-  
+
   /* set four bits at a time */
   for (x = 0; x < 8; x++) {
     /* shift the number up four bits */
@@ -7218,10 +7218,10 @@ int mp_shrink (mp_int * a)
 {
   mp_digit *tmp;
   int used = 1;
-  
+
   if(a->used > 0)
     used = a->used;
-  
+
   if (a->alloc != used) {
     if ((tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * used)) == NULL) {
       return MP_MEM;
@@ -7299,18 +7299,18 @@ mp_sqr (mp_int * a, mp_int * b)
   if (a->used >= TOOM_SQR_CUTOFF) {
     res = mp_toom_sqr(a, b);
   /* Karatsuba? */
-  } else 
+  } else
 #endif
 #ifdef BN_MP_KARATSUBA_SQR_C
 if (a->used >= KARATSUBA_SQR_CUTOFF) {
     res = mp_karatsuba_sqr (a, b);
-  } else 
+  } else
 #endif
   {
 #ifdef BN_FAST_S_MP_SQR_C
     /* can we use the fast comba multiplier? */
-    if ((a->used * 2 + 1) < MP_WARRAY && 
-         a->used < 
+    if ((a->used * 2 + 1) < MP_WARRAY &&
+         a->used <
          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
       res = fast_s_mp_sqr (a, b);
     } else
@@ -7396,7 +7396,7 @@ mp_sqrmod (mp_int * a, mp_int * b, mp_int * c)
  */
 
 /* this function is less generic than mp_n_root, simpler and faster */
-int mp_sqrt(mp_int *arg, mp_int *ret) 
+int mp_sqrt(mp_int *arg, mp_int *ret)
 {
   int res;
   mp_int t1,t2;
@@ -7423,7 +7423,7 @@ int mp_sqrt(mp_int *arg, mp_int *ret)
   /* First approx. (not very bad for large arg) */
   mp_rshd (&t1,t1.used/2);
 
-  /* t1 > 0  */ 
+  /* t1 > 0  */
   if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
     goto E1;
   }
@@ -7434,7 +7434,7 @@ int mp_sqrt(mp_int *arg, mp_int *ret)
     goto E1;
   }
   /* And now t1 > sqrt(arg) */
-  do { 
+  do {
     if ((res = mp_div(arg,&t1,&t2,NULL)) != MP_OKAY) {
       goto E1;
     }
@@ -7845,28 +7845,28 @@ int mp_to_unsigned_bin_n (mp_int * a, unsigned char *b, unsigned long *outlen)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* multiplication using the Toom-Cook 3-way algorithm 
+/* multiplication using the Toom-Cook 3-way algorithm
  *
- * Much more complicated than Karatsuba but has a lower 
- * asymptotic running time of O(N**1.464).  This algorithm is 
- * only particularly useful on VERY large inputs 
+ * Much more complicated than Karatsuba but has a lower
+ * asymptotic running time of O(N**1.464).  This algorithm is
+ * only particularly useful on VERY large inputs
  * (we're talking 1000s of digits here...).
 */
 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
 {
     mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
     int res, B;
-        
+
     /* init temps */
-    if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 
-                             &a0, &a1, &a2, &b0, &b1, 
+    if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
+                             &a0, &a1, &a2, &b0, &b1,
                              &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
        return res;
     }
-    
+
     /* B */
     B = MIN(a->used, b->used) / 3;
-    
+
     /* a = a2 * B**2 + a1 * B + a0 */
     if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
        goto ERR;
@@ -7882,7 +7882,7 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
        goto ERR;
     }
     mp_rshd(&a2, B*2);
-    
+
     /* b = b2 * B**2 + b1 * B + b0 */
     if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
        goto ERR;
@@ -7898,17 +7898,17 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
        goto ERR;
     }
     mp_rshd(&b2, B*2);
-    
+
     /* w0 = a0*b0 */
     if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
        goto ERR;
     }
-    
+
     /* w4 = a2 * b2 */
     if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
        goto ERR;
     }
-    
+
     /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
     if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
        goto ERR;
@@ -7922,7 +7922,7 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
     if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
        goto ERR;
     }
-    
+
     if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
        goto ERR;
     }
@@ -7935,11 +7935,11 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
     if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
        goto ERR;
     }
-    
+
     if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
        goto ERR;
     }
-    
+
     /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
     if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
        goto ERR;
@@ -7953,7 +7953,7 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
     if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
        goto ERR;
     }
-    
+
     if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
        goto ERR;
     }
@@ -7966,11 +7966,11 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
     if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
        goto ERR;
     }
-    
+
     if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
        goto ERR;
     }
-    
+
 
     /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
     if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
@@ -7988,19 +7988,19 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
     if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
        goto ERR;
     }
-    
-    /* now solve the matrix 
-    
+
+    /* now solve the matrix
+
        0  0  0  0  1
        1  2  4  8  16
        1  1  1  1  1
        16 8  4  2  1
        1  0  0  0  0
-       
-       using 12 subtractions, 4 shifts, 
-              2 small divisions and 1 small multiplication 
+
+       using 12 subtractions, 4 shifts,
+              2 small divisions and 1 small multiplication
      */
-     
+
      /* r1 - r4 */
      if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
         goto ERR;
@@ -8072,7 +8072,7 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
      if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
         goto ERR;
      }
-     
+
      /* at this point shift W[n] by B*n */
      if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
         goto ERR;
@@ -8085,8 +8085,8 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
      }
      if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
         goto ERR;
-     }     
-     
+     }
+
      if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
         goto ERR;
      }
@@ -8098,15 +8098,15 @@ int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
      }
      if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
         goto ERR;
-     }     
-     
+     }
+
 ERR:
-     mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 
-                    &a0, &a1, &a2, &b0, &b1, 
+     mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
+                    &a0, &a1, &a2, &b0, &b1,
                     &b2, &tmp1, &tmp2, NULL);
      return res;
-}     
-     
+}
+
 #endif
 
 /* $Source$ */
@@ -8442,9 +8442,9 @@ int mp_toradix (mp_int * a, char *str, int radix)
  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
  */
 
-/* stores a bignum as a ASCII string in a given radix (2..64) 
+/* stores a bignum as a ASCII string in a given radix (2..64)
  *
- * Stores upto maxlen-1 chars and always a NULL byte 
+ * Stores upto maxlen-1 chars and always a NULL byte
  */
 int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
 {
@@ -8477,7 +8477,7 @@ int mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
     /* store the flag and mark the number as positive */
     *str++ = '-';
     t.sign = MP_ZPOS;
- 
+
     /* subtract a char */
     --maxlen;
   }
@@ -8828,8 +8828,8 @@ s_mp_add (mp_int * a, mp_int * b, mp_int * c)
       *tmpc++ &= MP_MASK;
     }
 
-    /* now copy higher words if any, that is in A+B 
-     * if A or B has more digits add those in 
+    /* now copy higher words if any, that is in A+B
+     * if A or B has more digits add those in
      */
     if (min != max) {
       for (; i < max; i++) {
@@ -8921,7 +8921,7 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
   /* init M array */
   /* init first cell */
   if ((err = mp_init(&M[1])) != MP_OKAY) {
-     return err; 
+     return err;
   }
 
   /* now init the second half of the array */
@@ -8939,7 +8939,7 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
   if ((err = mp_init (&mu)) != MP_OKAY) {
     goto LBL_M;
   }
-  
+
   if (redmode == 0) {
      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
         goto LBL_MU;
@@ -8950,22 +8950,22 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
         goto LBL_MU;
      }
      redux = mp_reduce_2k_l;
-  }    
+  }
 
   /* create M table
    *
-   * The M table contains powers of the base, 
+   * The M table contains powers of the base,
    * e.g. M[x] = G**x mod P
    *
-   * The first half of the table is not 
+   * The first half of the table is not
    * computed though accept for M[0] and M[1]
    */
   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
     goto LBL_MU;
   }
 
-  /* compute the value at M[1<<(winsize-1)] by squaring 
-   * M[1] (winsize-1) times 
+  /* compute the value at M[1<<(winsize-1)] by squaring
+   * M[1] (winsize-1) times
    */
   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
     goto LBL_MU;
@@ -8973,7 +8973,7 @@ int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
 
   for (x = 0; x < (winsize - 1); x++) {
     /* square it */
-    if ((err = mp_sqr (&M[1 << (winsize - 1)], 
+    if ((err = mp_sqr (&M[1 << (winsize - 1)],
                        &M[1 << (winsize - 1)])) != MP_OKAY) {
       goto LBL_MU;
     }
@@ -9139,7 +9139,7 @@ LBL_M:
  */
 
 /* multiplies |a| * |b| and only computes upto digs digits of result
- * HAC pp. 595, Algorithm 14.12  Modified so you can control how 
+ * HAC pp. 595, Algorithm 14.12  Modified so you can control how
  * many digits of output are created.
  */
 int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
@@ -9152,7 +9152,7 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
 
   /* can we use the fast multiplier? */
   if (((digs) < MP_WARRAY) &&
-      MIN (a->used, b->used) < 
+      MIN (a->used, b->used) <
           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
     return fast_s_mp_mul_digs (a, b, c, digs);
   }
@@ -9174,10 +9174,10 @@ int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
     /* setup some aliases */
     /* copy of the digit from a used within the nested loop */
     tmpx = a->dp[ix];
-    
+
     /* an alias for the destination shifted ix places */
     tmpt = t.dp + ix;
-    
+
     /* an alias for the digits of b */
     tmpy = b->dp;
 
@@ -9350,7 +9350,7 @@ int s_mp_sqr (mp_int * a, mp_int * b)
 
     /* alias for where to store the results */
     tmpt        = t.dp + (2*ix + 1);
-    
+
     for (iy = ix + 1; iy < pa; iy++) {
       /* first calculate the product */
       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
@@ -9504,14 +9504,14 @@ s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
 -------------------------------------------------------------
  Intel P4 Northwood     /GCC v3.4.1   /        88/       128/LTM 0.32 ;-)
  AMD Athlon64           /GCC v3.4.4   /        80/       120/LTM 0.35
- 
+
 */
 
 int     KARATSUBA_MUL_CUTOFF = 80,      /* Min. number of digits before Karatsuba multiplication is used. */
         KARATSUBA_SQR_CUTOFF = 120,     /* Min. number of digits before Karatsuba squaring is used. */
-        
+
         TOOM_MUL_CUTOFF      = 350,      /* no optimal values of these are known yet so set em high */
-        TOOM_SQR_CUTOFF      = 400; 
+        TOOM_SQR_CUTOFF      = 400;
 #endif
 
 /* $Source$ */