diff --git a/bn.c b/bn.c
index 0debe07..33d027f 100644
--- a/bn.c
+++ b/bn.c
@@ -2888,11 +2888,6 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
     *
     * The M table contains powers of the input base, e.g. M[x] = G^x mod P
     *
-    * This table is not made in the straight forward manner of a for loop with only
-    * multiplications.  Since squaring is faster than multiplication we use as many
-    * squarings as possible.  As a result about half of the steps to make the M 
-    * table are squarings.  
-    *
     * The first half of the table is not computed though accept for M[0] and M[1]
     */
    mp_set(&M[0], 1);
@@ -2914,7 +2909,6 @@ int mp_exptmod(mp_int *G, mp_int *X, mp_int *P, mp_int *Y)
        }
    }  
    
-     
    /* create upper table */
    for (x = (1<<(winsize-1))+1; x < (1 << winsize); x++) {
        if ((err = mp_mul(&M[x-1], &M[1], &M[x])) != MP_OKAY) {
@@ -3132,6 +3126,104 @@ __T1:  mp_clear(&t1);
    return res;
 }
 
+/* computes the jacobi c = (a | n) (or Legendre if b is prime) 
+ * HAC pp. 73 Algorithm 2.149 
+ */
+int mp_jacobi(mp_int *a, mp_int *n, int *c)
+{
+   mp_int a1, n1, e;
+   int s, r, res;
+   mp_digit residue;
+   
+   /* step 1.  if a == 0, return 0 */
+   if (mp_iszero(a) == 1) {
+      *c = 0;
+      return MP_OKAY;
+   }
+   
+   /* step 2.  if a == 1, return 1 */
+   if (mp_cmp_d(a, 1) == MP_EQ) {
+      *c = 1;
+      return MP_OKAY;
+   }
+   
+   /* default */
+   s = 0;
+   
+   /* step 3.  write a = a1 * 2^e  */
+   if ((res = mp_init_copy(&a1, a)) != MP_OKAY) {
+      return res;
+   }
+   
+   if ((res = mp_init(&n1)) != MP_OKAY) {
+      goto __A1;
+   }
+   
+   if ((res = mp_init(&e)) != MP_OKAY) {
+      goto __N1;
+   }
+   
+   while (mp_iseven(&a1) == 1) {
+       if ((res = mp_add_d(&e, 1, &e)) != MP_OKAY) {
+          goto __E;
+       }
+       
+       if ((res = mp_div_2(&a1, &a1)) != MP_OKAY) {
+          goto __E;
+       }
+   }
+   
+   /* step 4.  if e is even set s=1 */
+   if (mp_iseven(&e) == 1) {
+      s = 1;
+   } else {
+      /* else set s=1 if n = 1/7 (mod 8) or s=-1 if n = 3/5 (mod 8) */
+      if ((res = mp_mod_d(n, 8, &residue)) != MP_OKAY) {
+         goto __E;
+      }
+      
+      if (residue == 1 || residue == 7) {
+         s = 1;
+      } else if (residue == 3 || residue == 5) {
+         s = -1;
+      }
+   }
+   
+   /* step 5.  if n == 3 (mod 4) *and* a1 == 3 (mod 4) then s = -s */
+   if ((res = mp_mod_d(n, 4, &residue)) != MP_OKAY) {
+      goto __E;
+   }
+   if (residue == 3) {
+      if ((res = mp_mod_d(&a1, 4, &residue)) != MP_OKAY) {
+         goto __E;
+      }
+      if (residue == 3) {
+         s = -s;
+      }
+   }
+   
+   /* if a1 == 1 we're done */
+   if (mp_cmp_d(&a1, 1) == MP_EQ) {
+      *c = s;
+   } else {
+      /* n1 = n mod a1 */
+      if ((res = mp_mod(n, &a1, &n1)) != MP_OKAY) {
+         goto __E;
+      }
+      if ((res = mp_jacobi(&n1, &a1, &r)) != MP_OKAY) {
+         goto __E;
+      }
+      *c = s * r;
+   }
+   
+   /* done */
+   res = MP_OKAY;
+__E:   mp_clear(&e);
+__N1:  mp_clear(&n1);
+__A1:  mp_clear(&a1);
+   return res;
+}
+
 /* --> radix conversion <-- */
 /* reverse an array, used for radix code */
 static void reverse(unsigned char *s, int len)
diff --git a/bn.h b/bn.h
index 4493c65..903e7d6 100644
--- a/bn.h
+++ b/bn.h
@@ -21,6 +21,11 @@
 #include <ctype.h>
 #include <limits.h>
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
 /* some default configurations.  
  *
  * A "mp_digit" must be able to hold DIGIT_BIT + 1 bits 
@@ -239,6 +244,9 @@ int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
 /* shortcut for square root */
 #define mp_sqrt(a, b) mp_n_root(a, 2, b)
 
+/* computes the jacobi c = (a | n) (or Legendre if b is prime)  */
+int mp_jacobi(mp_int *a, mp_int *n, int *c);
+
 /* used to setup the Barrett reduction for a given modulus b */
 int mp_reduce_setup(mp_int *a, mp_int *b);
 
@@ -280,5 +288,10 @@ int mp_radix_size(mp_int *a, int radix);
 #define mp_todecimal(M, S) mp_toradix((M), (S), 10)
 #define mp_tohex(M, S)     mp_toradix((M), (S), 16)
 
+#ifdef __cplusplus
+   }
+#endif
+
+
 #endif
 
diff --git a/bn.pdf b/bn.pdf
index 38011e2..3bea8f2 100644
Binary files a/bn.pdf and b/bn.pdf differ
diff --git a/bn.tex b/bn.tex
index cdf0213..ed2a46d 100644
--- a/bn.tex
+++ b/bn.tex
@@ -1,7 +1,7 @@
 \documentclass{article}
 \begin{document}
 
-\title{LibTomMath v0.08 \\ A Free Multiple Precision Integer Library}
+\title{LibTomMath v0.09 \\ A Free Multiple Precision Integer Library}
 \author{Tom St Denis \\ tomstdenis@iahu.ca}
 \maketitle
 \newpage
@@ -23,8 +23,8 @@ LibTomMath was designed with the following goals in mind:
 \item Be written entirely in portable C.
 \end{enumerate}
 
-All three goals have been achieved.  Particularly the speed increase goal.  For example, a 512-bit modular exponentiation is
-four times faster\footnote{On an Athlon XP with GCC 3.2} with LibTomMath compared to MPI.
+All three goals have been achieved.  Particularly the speed increase goal.  For example, a 512-bit modular exponentiation 
+is eight times faster\footnote{On an Athlon XP with GCC 3.2} with LibTomMath compared to MPI.
 
 Being compatible with MPI means that applications that already use it can be ported fairly quickly.  Currently there are 
 a few differences but there are many similarities.  In fact the average MPI based application can be ported in under 15
@@ -51,9 +51,7 @@ with
 #include "bn.h"
 \end{verbatim}
 
-Remove ``mpi.c'' from your project and replace it with ``bn.c''.  Note that currently MPI has a few more functions than
-LibTomMath has (e.g. no square-root code and a few others).  Those are planned for future releases.  In the interim work 
-arounds can be sought.  Note that LibTomMath doesn't lack any functions required to build a cryptosystem.
+Remove ``mpi.c'' from your project and replace it with ``bn.c''.
 
 \section{Programming with LibTomMath}
 
@@ -278,6 +276,9 @@ int mp_lcm(mp_int *a, mp_int *b, mp_int *c);
 /* find the b'th root of a  */
 int mp_n_root(mp_int *a, mp_digit b, mp_int *c);
 
+/* computes the jacobi c = (a | n) (or Legendre if b is prime)  */
+int mp_jacobi(mp_int *a, mp_int *n, int *c);
+
 /* d = a^b (mod c) */
 int mp_exptmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d);
 \end{verbatim}
@@ -444,6 +445,14 @@ requires $b$ multiplications and one division for a total work of $O(6N^2 \cdot
 If the input $a$ is negative and $b$ is even the function returns an error.  Otherwise the function will return a root
 that has a sign that agrees with the sign of $a$.
 
+\subsubsection{mp\_jacobi(mp\_int *a, mp\_int *n, int *c)}
+Computes $c = \left ( {a \over n} \right )$ or the Jacobi function of $(a, n)$ and stores the result in an integer addressed
+by $c$.  Since the result of the Jacobi function $\left ( {a \over n} \right ) \in \lbrace -1, 0, 1 \rbrace$ it seemed
+natural to store the result in a simple C style \textbf{int}.  If $n$ is prime then the Jacobi function produces
+the same results as the Legendre function\footnote{Source: Handbook of Applied Cryptography, pp. 73}.  This means if
+$n$ is prime then $\left ( {a \over n} \right )$ is equal to $1$ if $a$ is a quadratic residue modulo $n$ or $-1$ if 
+it is not.  
+
 \subsubsection{mp\_exptmod(mp\_int *a, mp\_int *b, mp\_int *c, mp\_int *d)}
 Computes $d = a^b \mbox{ (mod }c\mbox{)}$ using a sliding window $k$-ary exponentiation algorithm.  For an $\alpha$-bit
 exponent it performs $\alpha$ squarings and at most $\lfloor \alpha/k \rfloor + k$ multiplications.  The value of $k$ is
diff --git a/changes.txt b/changes.txt
index 87773a2..2d84db9 100644
--- a/changes.txt
+++ b/changes.txt
@@ -1,3 +1,8 @@
+Jan 6th, 2003
+v0.09  -- Updated the manual to reflect recent changes.  :-)
+       -- Added Jacobi function (mp_jacobi) to supplement the number theory side of the lib
+       -- Added a Mersenne prime finder demo in ./etc/mersenne.c
+
 Jan 2nd, 2003
 v0.08  -- Sped up the multipliers by moving the inner loop variables into a smaller scope
        -- Corrected a bunch of small "warnings"
diff --git a/demo.c b/demo.c
index 0bf5aac..f671758 100644
--- a/demo.c
+++ b/demo.c
@@ -94,7 +94,6 @@ int main(void)
    mp_init(&d);
    mp_init(&e);
    mp_init(&f);
-   
  
    mp_read_radix(&a, "V//////////////////////////////////////////////////////////////////////////////////////", 64);
    mp_reduce_setup(&b, &a);
diff --git a/etc/makefile b/etc/makefile
index f38ed47..ed8f915 100644
--- a/etc/makefile
+++ b/etc/makefile
@@ -1 +1 @@
-CFLAGS += -I../ -Wall -W -O3 -fomit-frame-pointer -funroll-loops ../bn.c 
\ No newline at end of file
+CFLAGS += -I../ -Wall -W -Wshadow -ansi -O3 -fomit-frame-pointer -funroll-loops ../bn.c 
\ No newline at end of file
diff --git a/etc/mersenne.c b/etc/mersenne.c
new file mode 100644
index 0000000..b6bbd51
--- /dev/null
+++ b/etc/mersenne.c
@@ -0,0 +1,150 @@
+/* Finds Mersenne primes using the Lucas-Lehmer test 
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+#include <time.h>
+#include <bn.h> 
+
+int is_mersenne(long s, int *pp)
+{
+  mp_int n, u, mu;
+  int res, k;
+  long ss;
+  
+  *pp = 0;
+  
+  if ((res = mp_init(&n)) != MP_OKAY) {
+     return res;
+  }
+  
+  if ((res = mp_init(&u)) != MP_OKAY) {
+     goto __N;
+  }
+  
+  if ((res = mp_init(&mu)) != MP_OKAY) {
+     goto __U;
+  }
+  
+  /* n = 2^s - 1 */
+  mp_set(&n, 1);
+  ss = s;
+  while (ss--) {
+     if ((res = mp_mul_2(&n, &n)) != MP_OKAY) {
+        goto __MU;
+     }
+  }
+  if ((res = mp_sub_d(&n, 1, &n)) != MP_OKAY) {
+     goto __MU;
+  }
+  
+  /* setup mu */
+  if ((res = mp_reduce_setup(&mu, &n)) != MP_OKAY) {
+     goto __MU;
+  }
+  
+  /* set u=4 */
+  mp_set(&u, 4);
+  
+  /* for k=1 to s-2 do */
+  for (k = 1; k <= s - 2; k++) {
+      /* u = u^2 - 2 mod n */
+      if ((res = mp_sqr(&u, &u)) != MP_OKAY) {
+         goto __MU;
+      }
+      if ((res = mp_sub_d(&u, 2, &u)) != MP_OKAY) {
+         goto __MU;
+      }
+      
+      /* make sure u is positive */
+      if (u.sign == MP_NEG) {
+         if ((res = mp_add(&u, &n, &u)) != MP_OKAY) {
+            goto __MU;
+         }
+      }
+      
+      /* reduce */
+      if ((res = mp_reduce(&u, &n, &mu)) != MP_OKAY) {
+         goto __MU;
+      }
+  }
+  
+  /* if u == 0 then its prime */
+  if (mp_iszero(&u) == 1) {
+     *pp = 1;
+  }
+  
+  res = MP_OKAY;
+__MU:  mp_clear(&mu);
+__U:   mp_clear(&u);
+__N:   mp_clear(&n);
+  return res;
+}
+
+/* square root of a long < 65536 */
+long i_sqrt(long x)
+{
+   long x1, x2;
+   
+   x2 = 16;
+   do {
+      x1 = x2;
+      x2 = x1 - ((x1 * x1) - x)/(2*x1);
+   } while (x1 != x2);
+   
+   if (x1*x1 > x) {
+      --x1;
+   }
+   
+   return x1;
+}
+
+/* is the long prime by brute force */
+int isprime(long k)
+{
+   long y, z;
+   
+   y = i_sqrt(k);
+   for (z = 2; z <= y; z++) {
+       if ((k % z) == 0) return 0;
+   }
+   return 1;
+}   
+   
+
+int main(void)
+{
+   int pp;
+   long k;
+   clock_t tt;
+  
+   k = 3;
+   
+   for (;;) {
+      /* start time */
+      tt = clock();
+      
+      /* test if 2^k - 1 is prime */
+      if (is_mersenne(k, &pp) != MP_OKAY) {
+         printf("Whoa error\n");
+         return -1;
+      }
+      
+      if (pp == 1) {
+         /* count time */
+         tt = clock() - tt;
+         
+         /* display if prime */
+         printf("2^%-5ld - 1 is prime, test took %ld ticks\n", k, tt);
+      }
+      
+      /* goto next odd exponent */
+      k += 2;
+      
+      /* but make sure its prime */
+      while (isprime(k) == 0) {
+         k += 2;
+      }
+   }
+   return 0;
+}
+
diff --git a/etc/pprime.c b/etc/pprime.c
index 84cf79c..fb987e3 100644
--- a/etc/pprime.c
+++ b/etc/pprime.c
@@ -56,7 +56,7 @@ static mp_digit prime_digit()
          ++y;
          next = (y+1)*(y+1);
       }
-      
+
       /* loop if divisible by 3,5,7,11,13,17,19,23,29  */
       if ((r % 3) == 0) { x = 0; continue; }
       if ((r % 5) == 0) { x = 0; continue; }
@@ -138,7 +138,7 @@ int pprime(int k, mp_int *p, mp_int *q)
 
    /* now loop making the single digit */
    while (mp_count_bits(&a) < k) {
-      printf("prime is %4d bits left\r", k - mp_count_bits(&a)); fflush(stdout);
+      printf("prime has %4d bits left\r", k - mp_count_bits(&a)); fflush(stdout);
    top: 
       mp_set(&b, prime_digit());
       
diff --git a/makefile b/makefile
index cbb5ac7..d448933 100644
--- a/makefile
+++ b/makefile
@@ -1,13 +1,13 @@
 CC = gcc
-CFLAGS  += -Wall -W -O3 -fomit-frame-pointer -funroll-loops 
+CFLAGS  +=  -Wall -W -Wshadow -ansi -O3 -fomit-frame-pointer -funroll-loops 
 
-VERSION=0.08
+VERSION=0.09
 
 default: test
 
 test: bn.o demo.o
 	$(CC) bn.o demo.o -o demo
-	cd mtest ; gcc -O3 -fomit-frame-pointer -funroll-loops mtest.c -o mtest.exe -s
+	cd mtest ; gcc $(CFLAGS) mtest.c -o mtest.exe -s
 
 # builds the x86 demo
 test86:
diff --git a/mtest/mtest.c b/mtest/mtest.c
index de04e2b..df5422f 100644
--- a/mtest/mtest.c
+++ b/mtest/mtest.c
@@ -41,7 +41,7 @@ void rand_num(mp_int *a)
    unsigned char buf[512];
 
 top:
-   size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
+   size = 1 + ((fgetc(rng)*fgetc(rng)) % 96);
    buf[0] = (fgetc(rng)&1)?1:0;
    fread(buf+1, 1, size, rng);
    for (n = 0; n < size; n++) {
@@ -57,7 +57,7 @@ void rand_num2(mp_int *a)
    unsigned char buf[512];
 
 top:
-   size = 1 + ((fgetc(rng)*fgetc(rng)) % 32);
+   size = 1 + ((fgetc(rng)*fgetc(rng)) % 96);
    buf[0] = (fgetc(rng)&1)?1:0;
    fread(buf+1, 1, size, rng);
    for (n = 0; n < size; n++) {