| 
									
										
										
										
											2018-05-02 21:43:17 +02:00
										 |  |  | #include "tommath_private.h"
 | 
					
						
							| 
									
										
										
										
											2004-10-29 22:07:18 +00:00
										 |  |  | #ifdef BN_MP_KARATSUBA_MUL_C
 | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  | /* LibTomMath, multiple-precision integer library -- Tom St Denis
 | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2003-08-05 01:24:44 +00:00
										 |  |  |  * LibTomMath is a library that provides multiple-precision | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  * integer arithmetic as well as number theoretic functionality. | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2003-08-05 01:24:44 +00:00
										 |  |  |  * The library was designed directly after the MPI library by | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  * Michael Fromberger but has been written from scratch with | 
					
						
							|  |  |  |  * additional optimizations in place. | 
					
						
							|  |  |  |  * | 
					
						
							| 
									
										
										
										
											2018-12-29 17:56:20 +01:00
										 |  |  |  * SPDX-License-Identifier: Unlicense | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-30 05:51:11 +02:00
										 |  |  | /* c = |a| * |b| using Karatsuba Multiplication using
 | 
					
						
							| 
									
										
										
										
											2003-05-29 13:35:26 +00:00
										 |  |  |  * three half size multiplications | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2017-08-30 05:51:11 +02:00
										 |  |  |  * Let B represent the radix [e.g. 2**DIGIT_BIT] and | 
					
						
							|  |  |  |  * let n represent half of the number of digits in | 
					
						
							| 
									
										
										
										
											2003-05-29 13:35:26 +00:00
										 |  |  |  * the min(a,b) | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2003-05-29 13:35:26 +00:00
										 |  |  |  * a = a1 * B**n + a0 | 
					
						
							|  |  |  |  * b = b1 * B**n + b0 | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2017-08-30 05:51:11 +02:00
										 |  |  |  * Then, a * b => | 
					
						
							| 
									
										
										
										
											2005-08-01 16:37:28 +00:00
										 |  |  |    a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2017-08-30 05:51:11 +02:00
										 |  |  |  * Note that a1b1 and a0b0 are used twice and only need to be | 
					
						
							|  |  |  |  * computed once.  So in total three half size (half # of | 
					
						
							|  |  |  |  * digit) multiplications are performed, a0b0, a1b1 and | 
					
						
							| 
									
										
										
										
											2005-08-01 16:37:28 +00:00
										 |  |  |  * (a1+b1)(a0+b0) | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2003-05-29 13:35:26 +00:00
										 |  |  |  * Note that a multiplication of half the digits requires | 
					
						
							| 
									
										
										
										
											2017-08-30 05:51:11 +02:00
										 |  |  |  * 1/4th the number of single precision multiplications so in | 
					
						
							|  |  |  |  * total after one call 25% of the single precision multiplications | 
					
						
							|  |  |  |  * are saved.  Note also that the call to mp_mul can end up back | 
					
						
							|  |  |  |  * in this function if the a0, a1, b0, or b1 are above the threshold. | 
					
						
							|  |  |  |  * This is known as divide-and-conquer and leads to the famous | 
					
						
							|  |  |  |  * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than | 
					
						
							|  |  |  |  * the standard O(N**2) that the baseline/comba methods use. | 
					
						
							|  |  |  |  * Generally though the overhead of this method doesn't pay off | 
					
						
							| 
									
										
										
										
											2003-05-29 13:35:26 +00:00
										 |  |  |  * until a certain size (N ~ 80) is reached. | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  |  */ | 
					
						
							| 
									
										
										
										
											2017-09-20 16:59:43 +02:00
										 |  |  | int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c) | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_int  x0, x1, y0, y1, t1, x0y0, x1y1; | 
					
						
							|  |  |  |    int     B, err; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* default the return code to an error */ | 
					
						
							|  |  |  |    err = MP_MEM; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* min # of digits */ | 
					
						
							|  |  |  |    B = MIN(a->used, b->used); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* now divide in two */ | 
					
						
							|  |  |  |    B = B >> 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* init copy all the temps */ | 
					
						
							|  |  |  |    if (mp_init_size(&x0, B) != MP_OKAY) | 
					
						
							| 
									
										
										
										
											2018-04-11 13:46:35 -07:00
										 |  |  |       goto LBL_ERR; | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    if (mp_init_size(&x1, a->used - B) != MP_OKAY) | 
					
						
							|  |  |  |       goto X0; | 
					
						
							|  |  |  |    if (mp_init_size(&y0, B) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1; | 
					
						
							|  |  |  |    if (mp_init_size(&y1, b->used - B) != MP_OKAY) | 
					
						
							|  |  |  |       goto Y0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* init temps */ | 
					
						
							|  |  |  |    if (mp_init_size(&t1, B * 2) != MP_OKAY) | 
					
						
							|  |  |  |       goto Y1; | 
					
						
							|  |  |  |    if (mp_init_size(&x0y0, B * 2) != MP_OKAY) | 
					
						
							|  |  |  |       goto T1; | 
					
						
							|  |  |  |    if (mp_init_size(&x1y1, B * 2) != MP_OKAY) | 
					
						
							|  |  |  |       goto X0Y0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* now shift the digits */ | 
					
						
							|  |  |  |    x0.used = y0.used = B; | 
					
						
							|  |  |  |    x1.used = a->used - B; | 
					
						
							|  |  |  |    y1.used = b->used - B; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    { | 
					
						
							|  |  |  |       int x; | 
					
						
							|  |  |  |       mp_digit *tmpa, *tmpb, *tmpx, *tmpy; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       /* we copy the digits directly instead of using higher level functions
 | 
					
						
							|  |  |  |        * since we also need to shift the digits | 
					
						
							|  |  |  |        */ | 
					
						
							|  |  |  |       tmpa = a->dp; | 
					
						
							|  |  |  |       tmpb = b->dp; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       tmpx = x0.dp; | 
					
						
							|  |  |  |       tmpy = y0.dp; | 
					
						
							|  |  |  |       for (x = 0; x < B; x++) { | 
					
						
							|  |  |  |          *tmpx++ = *tmpa++; | 
					
						
							|  |  |  |          *tmpy++ = *tmpb++; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       tmpx = x1.dp; | 
					
						
							|  |  |  |       for (x = B; x < a->used; x++) { | 
					
						
							|  |  |  |          *tmpx++ = *tmpa++; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       tmpy = y1.dp; | 
					
						
							|  |  |  |       for (x = B; x < b->used; x++) { | 
					
						
							|  |  |  |          *tmpy++ = *tmpb++; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |    } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* only need to clamp the lower words since by definition the
 | 
					
						
							|  |  |  |     * upper words x1/y1 must have a known number of digits | 
					
						
							|  |  |  |     */ | 
					
						
							|  |  |  |    mp_clamp(&x0); | 
					
						
							|  |  |  |    mp_clamp(&y0); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* now calc the products x0y0 and x1y1 */ | 
					
						
							|  |  |  |    /* after this x0 is no longer required, free temp [x0==t2]! */ | 
					
						
							|  |  |  |    if (mp_mul(&x0, &y0, &x0y0) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* x0y0 = x0*y0 */ | 
					
						
							|  |  |  |    if (mp_mul(&x1, &y1, &x1y1) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* x1y1 = x1*y1 */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* now calc x1+x0 and y1+y0 */ | 
					
						
							|  |  |  |    if (s_mp_add(&x1, &x0, &t1) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t1 = x1 - x0 */ | 
					
						
							|  |  |  |    if (s_mp_add(&y1, &y0, &x0) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t2 = y1 - y0 */ | 
					
						
							|  |  |  |    if (mp_mul(&t1, &x0, &t1) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* add x0y0 */ | 
					
						
							|  |  |  |    if (mp_add(&x0y0, &x1y1, &x0) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t2 = x0y0 + x1y1 */ | 
					
						
							|  |  |  |    if (s_mp_sub(&t1, &x0, &t1) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* shift by B */ | 
					
						
							|  |  |  |    if (mp_lshd(&t1, B) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ | 
					
						
							|  |  |  |    if (mp_lshd(&x1y1, B * 2) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* x1y1 = x1y1 << 2*B */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    if (mp_add(&x0y0, &t1, &t1) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t1 = x0y0 + t1 */ | 
					
						
							|  |  |  |    if (mp_add(&t1, &x1y1, c) != MP_OKAY) | 
					
						
							|  |  |  |       goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |    /* Algorithm succeeded set the return code to MP_OKAY */ | 
					
						
							|  |  |  |    err = MP_OKAY; | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-28 22:34:46 +02:00
										 |  |  | X1Y1: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_clear(&x1y1); | 
					
						
							| 
									
										
										
										
											2017-08-28 22:34:46 +02:00
										 |  |  | X0Y0: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_clear(&x0y0); | 
					
						
							| 
									
										
										
										
											2017-08-28 22:34:46 +02:00
										 |  |  | T1: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_clear(&t1); | 
					
						
							| 
									
										
										
										
											2017-08-28 22:34:46 +02:00
										 |  |  | Y1: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_clear(&y1); | 
					
						
							| 
									
										
										
										
											2017-08-28 22:34:46 +02:00
										 |  |  | Y0: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_clear(&y0); | 
					
						
							| 
									
										
										
										
											2017-08-28 22:34:46 +02:00
										 |  |  | X1: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_clear(&x1); | 
					
						
							| 
									
										
										
										
											2017-08-28 22:34:46 +02:00
										 |  |  | X0: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    mp_clear(&x0); | 
					
						
							| 
									
										
										
										
											2018-04-11 13:46:35 -07:00
										 |  |  | LBL_ERR: | 
					
						
							| 
									
										
										
										
											2017-08-30 19:11:35 +02:00
										 |  |  |    return err; | 
					
						
							| 
									
										
										
										
											2003-02-28 16:08:34 +00:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2004-10-29 22:07:18 +00:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2005-08-01 16:37:28 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-28 16:27:26 +02:00
										 |  |  | /* ref:         $Format:%D$ */ | 
					
						
							|  |  |  | /* git commit:  $Format:%H$ */ | 
					
						
							|  |  |  | /* commit time: $Format:%ai$ */ |