1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #define big_div_pos_fast big_div_pos 29 30 #include "bignum.h" 31 32 /* 33 * Configuration guide 34 * ------------------- 35 * 36 * There are 4 preprocessor symbols used to configure the bignum 37 * implementation. This file contains no logic to configure based on 38 * processor; we leave that to the Makefiles to specify. 39 * 40 * USE_FLOATING_POINT 41 * Meaning: There is support for a fast floating-point implementation of 42 * Montgomery multiply. 43 * 44 * PSR_MUL 45 * Meaning: There are processor-specific versions of the low level 46 * functions to implement big_mul. Those functions are: big_mul_set_vec, 47 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support 48 * for all 4 functions. You cannot pick and choose which subset of these 49 * functions to support; that would lead to a rat's nest of #ifdefs. 50 * 51 * HWCAP 52 * Meaning: Call multiply support functions through a function pointer. 53 * On x86, there are multiple implementations for differnt hardware 54 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when 55 * a function is first used. So, the support functions are called through 56 * a function pointer. There is no need for that on Sparc, because there 57 * is only one implementation; support functions are called directly. 58 * Later, if there were some new VIS instruction, or something, and a 59 * run-time test were needed, rather than variant kernel modules and 60 * libraries, then HWCAP would be defined for Sparc, as well. 61 * 62 * UMUL64 63 * Meaning: It is safe to use generic C code that assumes the existence 64 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined, 65 * then the generic code for big_mul_add_vec() must necessarily be very slow, 66 * because it must fall back to using 16 x 16 --> 32 bit multiplication. 67 * 68 */ 69 70 71 #ifdef _KERNEL 72 #include <sys/ddi.h> 73 #include <sys/mdesc.h> 74 #include <sys/crypto/common.h> 75 76 #include <sys/types.h> 77 #include <sys/kmem.h> 78 #include <sys/param.h> 79 #include <sys/sunddi.h> 80 81 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP) 82 #define big_free(ptr, size) kmem_free(ptr, size) 83 84 void * 85 big_realloc(void *from, size_t oldsize, size_t newsize) 86 { 87 void *rv; 88 89 rv = kmem_alloc(newsize, KM_NOSLEEP); 90 if (rv != NULL) 91 bcopy(from, rv, oldsize); 92 kmem_free(from, oldsize); 93 return (rv); 94 } 95 96 #else /* _KERNEL */ 97 98 #include <stdlib.h> 99 #include <stdio.h> 100 #include <assert.h> 101 #define ASSERT assert 102 103 #ifndef MALLOC_DEBUG 104 105 #define big_malloc(size) malloc(size) 106 #define big_free(ptr, size) free(ptr) 107 108 #else 109 110 void 111 big_free(void *ptr, size_t size) 112 { 113 printf("freed %d bytes at %p\n", size, ptr); 114 free(ptr); 115 } 116 117 void * 118 big_malloc(size_t size) 119 { 120 void *rv; 121 rv = malloc(size); 122 printf("malloced %d bytes, addr:%p\n", size, rv); 123 return (rv); 124 } 125 #endif /* MALLOC_DEBUG */ 126 127 #define big_realloc(x, y, z) realloc((x), (z)) 128 129 void 130 printbignum(char *aname, BIGNUM *a) 131 { 132 int i; 133 134 (void) printf("\n%s\n%d\n", aname, a->sign*a->len); 135 for (i = a->len - 1; i >= 0; i--) { 136 #ifdef BIGNUM_CHUNK_32 137 (void) printf("%08x ", a->value[i]); 138 if ((i % 8 == 0) && (i != 0)) { 139 (void) printf("\n"); 140 } 141 #else 142 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32), 143 (uint32_t)((a->value[i]) & 0xffffffff)); 144 if ((i % 4 == 0) && (i != 0)) { 145 (void) printf("\n"); 146 } 147 #endif 148 } 149 (void) printf("\n"); 150 } 151 152 #endif /* _KERNEL */ 153 154 155 /* size in BIG_CHUNK_SIZE-bit words */ 156 BIG_ERR_CODE 157 big_init(BIGNUM *number, int size) 158 { 159 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size); 160 if (number->value == NULL) { 161 return (BIG_NO_MEM); 162 } 163 number->size = size; 164 number->len = 0; 165 number->sign = 1; 166 number->malloced = 1; 167 return (BIG_OK); 168 } 169 170 /* size in BIG_CHUNK_SIZE-bit words */ 171 BIG_ERR_CODE 172 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize) 173 { 174 if ((buf == NULL) || (size > bufsize)) { 175 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size); 176 if (number->value == NULL) { 177 return (BIG_NO_MEM); 178 } 179 number->size = size; 180 number->malloced = 1; 181 } else { 182 number->value = buf; 183 number->size = bufsize; 184 number->malloced = 0; 185 } 186 number->len = 0; 187 number->sign = 1; 188 189 return (BIG_OK); 190 } 191 192 void 193 big_finish(BIGNUM *number) 194 { 195 if (number->malloced == 1) { 196 big_free(number->value, 197 sizeof (BIG_CHUNK_TYPE) * number->size); 198 number->malloced = 0; 199 } 200 } 201 202 203 /* 204 * bn->size should be at least 205 * (len + sizeof (BIG_CHUNK_TYPE) - 1) / sizeof (BIG_CHUNK_TYPE) bytes 206 * converts from byte-big-endian format to bignum format (words in 207 * little endian order, but bytes within the words big endian) 208 */ 209 void 210 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len) 211 { 212 int i, j, offs; 213 BIG_CHUNK_TYPE word; 214 uchar_t *knwordp; 215 216 #ifdef _LP64 217 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE); 218 bn->len = (uint32_t)len / sizeof (BIG_CHUNK_TYPE); 219 220 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) { 221 #else /* !_LP64 */ 222 offs = len % sizeof (BIG_CHUNK_TYPE); 223 bn->len = len / sizeof (BIG_CHUNK_TYPE); 224 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) { 225 #endif /* _LP64 */ 226 knwordp = &(kn[len - sizeof (BIG_CHUNK_TYPE) * (i + 1)]); 227 word = knwordp[0]; 228 for (j = 1; j < sizeof (BIG_CHUNK_TYPE); j++) { 229 word = (word << 8)+ knwordp[j]; 230 } 231 bn->value[i] = word; 232 } 233 if (offs > 0) { 234 word = kn[0]; 235 for (i = 1; i < offs; i++) word = (word << 8) + kn[i]; 236 bn->value[bn->len++] = word; 237 } 238 while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) { 239 bn->len --; 240 } 241 } 242 243 /* 244 * copies the least significant len bytes if 245 * len < bn->len * sizeof (BIG_CHUNK_TYPE) 246 * converts from bignum format to byte-big-endian format. 247 * bignum format is words of type BIG_CHUNK_TYPE in little endian order. 248 */ 249 void 250 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len) 251 { 252 int i, j, offs; 253 BIG_CHUNK_TYPE word; 254 255 if (len < sizeof (BIG_CHUNK_TYPE) * bn->len) { 256 #ifdef _LP64 257 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) { 258 #else /* !_LP64 */ 259 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) { 260 #endif /* _LP64 */ 261 word = bn->value[i]; 262 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) { 263 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] = 264 word & 0xff; 265 word = word >> 8; 266 } 267 } 268 #ifdef _LP64 269 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE); 270 #else /* !_LP64 */ 271 offs = len % sizeof (BIG_CHUNK_TYPE); 272 #endif /* _LP64 */ 273 if (offs > 0) { 274 word = bn->value[len / sizeof (BIG_CHUNK_TYPE)]; 275 #ifdef _LP64 276 for (i = (uint32_t)len % sizeof (BIG_CHUNK_TYPE); 277 i > 0; i --) { 278 #else /* !_LP64 */ 279 for (i = len % sizeof (BIG_CHUNK_TYPE); 280 i > 0; i --) { 281 #endif /* _LP64 */ 282 kn[i - 1] = word & 0xff; 283 word = word >> 8; 284 } 285 } 286 } else { 287 for (i = 0; i < bn->len; i++) { 288 word = bn->value[i]; 289 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) { 290 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] = 291 word & 0xff; 292 word = word >> 8; 293 } 294 } 295 #ifdef _LP64 296 for (i = 0; 297 i < (uint32_t)len - sizeof (BIG_CHUNK_TYPE) * bn->len; 298 i++) { 299 #else /* !_LP64 */ 300 for (i = 0; i < len - sizeof (BIG_CHUNK_TYPE) * bn->len; i++) { 301 #endif /* _LP64 */ 302 kn[i] = 0; 303 } 304 } 305 } 306 307 308 int 309 big_bitlength(BIGNUM *a) 310 { 311 int l = 0, b = 0; 312 BIG_CHUNK_TYPE c; 313 314 l = a->len - 1; 315 while ((l > 0) && (a->value[l] == 0)) { 316 l--; 317 } 318 b = sizeof (BIG_CHUNK_TYPE) * BITSINBYTE; 319 c = a->value[l]; 320 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) { 321 c = c << 1; 322 b--; 323 } 324 325 return (l * sizeof (BIG_CHUNK_TYPE) * BITSINBYTE + b); 326 } 327 328 329 BIG_ERR_CODE 330 big_copy(BIGNUM *dest, BIGNUM *src) 331 { 332 BIG_CHUNK_TYPE *newptr; 333 int i, len; 334 335 len = src->len; 336 while ((len > 1) && (src->value[len - 1] == 0)) { 337 len--; 338 } 339 src->len = len; 340 if (dest->size < len) { 341 if (dest->malloced == 1) { 342 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value, 343 sizeof (BIG_CHUNK_TYPE) * dest->size, 344 sizeof (BIG_CHUNK_TYPE) * len); 345 } else { 346 newptr = (BIG_CHUNK_TYPE *) 347 big_malloc(sizeof (BIG_CHUNK_TYPE) * len); 348 if (newptr != NULL) { 349 dest->malloced = 1; 350 } 351 } 352 if (newptr == NULL) { 353 return (BIG_NO_MEM); 354 } 355 dest->value = newptr; 356 dest->size = len; 357 } 358 dest->len = len; 359 dest->sign = src->sign; 360 for (i = 0; i < len; i++) { 361 dest->value[i] = src->value[i]; 362 } 363 364 return (BIG_OK); 365 } 366 367 368 BIG_ERR_CODE 369 big_extend(BIGNUM *number, int size) 370 { 371 BIG_CHUNK_TYPE *newptr; 372 int i; 373 374 if (number->size >= size) 375 return (BIG_OK); 376 if (number->malloced) { 377 number->value = big_realloc(number->value, 378 sizeof (BIG_CHUNK_TYPE) * number->size, 379 sizeof (BIG_CHUNK_TYPE) * size); 380 } else { 381 newptr = big_malloc(sizeof (BIG_CHUNK_TYPE) * size); 382 if (newptr != NULL) { 383 for (i = 0; i < number->size; i++) { 384 newptr[i] = number->value[i]; 385 } 386 } 387 number->value = newptr; 388 } 389 390 if (number->value == NULL) { 391 return (BIG_NO_MEM); 392 } 393 394 number->size = size; 395 number->malloced = 1; 396 return (BIG_OK); 397 } 398 399 400 /* returns 1 if n == 0 */ 401 int 402 big_is_zero(BIGNUM *n) 403 { 404 int i, result; 405 406 result = 1; 407 for (i = 0; i < n->len; i++) { 408 if (n->value[i] != 0) { 409 result = 0; 410 } 411 } 412 return (result); 413 } 414 415 416 BIG_ERR_CODE 417 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 418 { 419 int i, shorter, longer; 420 BIG_CHUNK_TYPE cy, ai; 421 BIG_CHUNK_TYPE *r, *a, *b, *c; 422 BIG_ERR_CODE err; 423 BIGNUM *longerarg; 424 425 if (aa->len > bb->len) { 426 shorter = bb->len; 427 longer = aa->len; 428 longerarg = aa; 429 } else { 430 shorter = aa->len; 431 longer = bb->len; 432 longerarg = bb; 433 } 434 if (result->size < longer + 1) { 435 err = big_extend(result, longer + 1); 436 if (err != BIG_OK) { 437 return (err); 438 } 439 } 440 441 r = result->value; 442 a = aa->value; 443 b = bb->value; 444 c = longerarg->value; 445 cy = 0; 446 for (i = 0; i < shorter; i++) { 447 ai = a[i]; 448 r[i] = ai + b[i] + cy; 449 if (r[i] > ai) { 450 cy = 0; 451 } else if (r[i] < ai) { 452 cy = 1; 453 } 454 } 455 for (; i < longer; i++) { 456 ai = c[i]; 457 r[i] = ai + cy; 458 if (r[i] >= ai) { 459 cy = 0; 460 } 461 } 462 if (cy == 1) { 463 r[i] = cy; 464 result->len = longer + 1; 465 } else { 466 result->len = longer; 467 } 468 result->sign = 1; 469 return (BIG_OK); 470 } 471 472 473 /* caller must make sure that result has at least len words allocated */ 474 void 475 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len) 476 { 477 int i; 478 BIG_CHUNK_TYPE cy, ai; 479 480 cy = 1; 481 for (i = 0; i < len; i++) { 482 ai = a[i]; 483 r[i] = ai + (~b[i]) + cy; 484 if (r[i] > ai) { 485 cy = 0; 486 } else if (r[i] < ai) { 487 cy = 1; 488 } 489 } 490 } 491 492 493 /* result=aa-bb it is assumed that aa>=bb */ 494 BIG_ERR_CODE 495 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 496 { 497 int i, shorter; 498 BIG_CHUNK_TYPE cy = 1, ai; 499 BIG_CHUNK_TYPE *r, *a, *b; 500 BIG_ERR_CODE err = BIG_OK; 501 502 if (aa->len > bb->len) { 503 shorter = bb->len; 504 } else { 505 shorter = aa->len; 506 } 507 if (result->size < aa->len) { 508 err = big_extend(result, aa->len); 509 if (err != BIG_OK) { 510 return (err); 511 } 512 } 513 514 r = result->value; 515 a = aa->value; 516 b = bb->value; 517 result->len = aa->len; 518 cy = 1; 519 for (i = 0; i < shorter; i++) { 520 ai = a[i]; 521 r[i] = ai + (~b[i]) + cy; 522 if (r[i] > ai) { 523 cy = 0; 524 } else if (r[i] < ai) { 525 cy = 1; 526 } 527 } 528 for (; i < aa->len; i++) { 529 ai = a[i]; 530 r[i] = ai + (~0) + cy; 531 if (r[i] < ai) { 532 cy = 1; 533 } 534 } 535 result->sign = 1; 536 537 if (cy == 0) { 538 return (BIG_INVALID_ARGS); 539 } else { 540 return (BIG_OK); 541 } 542 } 543 544 545 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */ 546 int 547 big_cmp_abs(BIGNUM *aa, BIGNUM *bb) 548 { 549 int i; 550 551 if (aa->len > bb->len) { 552 for (i = aa->len - 1; i > bb->len - 1; i--) { 553 if (aa->value[i] > 0) { 554 return (1); 555 } 556 } 557 } else if (aa->len < bb->len) { 558 for (i = bb->len - 1; i > aa->len - 1; i--) { 559 if (bb->value[i] > 0) { 560 return (-1); 561 } 562 } 563 } else { 564 i = aa->len-1; 565 } 566 for (; i >= 0; i--) { 567 if (aa->value[i] > bb->value[i]) { 568 return (1); 569 } else if (aa->value[i] < bb->value[i]) { 570 return (-1); 571 } 572 } 573 574 return (0); 575 } 576 577 578 BIG_ERR_CODE 579 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 580 { 581 BIG_ERR_CODE err; 582 583 if ((bb->sign == -1) && (aa->sign == 1)) { 584 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 585 return (err); 586 } 587 result->sign = 1; 588 } else if ((aa->sign == -1) && (bb->sign == 1)) { 589 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 590 return (err); 591 } 592 result->sign = -1; 593 } else if ((aa->sign == 1) && (bb->sign == 1)) { 594 if (big_cmp_abs(aa, bb) >= 0) { 595 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 596 return (err); 597 } 598 result->sign = 1; 599 } else { 600 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 601 return (err); 602 } 603 result->sign = -1; 604 } 605 } else { 606 if (big_cmp_abs(aa, bb) >= 0) { 607 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 608 return (err); 609 } 610 result->sign = -1; 611 } else { 612 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 613 return (err); 614 } 615 result->sign = 1; 616 } 617 } 618 return (BIG_OK); 619 } 620 621 622 BIG_ERR_CODE 623 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 624 { 625 BIG_ERR_CODE err; 626 627 if ((bb->sign == -1) && (aa->sign == -1)) { 628 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 629 return (err); 630 } 631 result->sign = -1; 632 } else if ((aa->sign == 1) && (bb->sign == 1)) { 633 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 634 return (err); 635 } 636 result->sign = 1; 637 } else if ((aa->sign == 1) && (bb->sign == -1)) { 638 if (big_cmp_abs(aa, bb) >= 0) { 639 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 640 return (err); 641 } 642 result->sign = 1; 643 } else { 644 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 645 return (err); 646 } 647 result->sign = -1; 648 } 649 } else { 650 if (big_cmp_abs(aa, bb) >= 0) { 651 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 652 return (err); 653 } 654 result->sign = -1; 655 } else { 656 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 657 return (err); 658 } 659 result->sign = 1; 660 } 661 } 662 return (BIG_OK); 663 } 664 665 666 /* result = aa/2 */ 667 BIG_ERR_CODE 668 big_half_pos(BIGNUM *result, BIGNUM *aa) 669 { 670 BIG_ERR_CODE err; 671 int i; 672 BIG_CHUNK_TYPE cy, cy1; 673 BIG_CHUNK_TYPE *a, *r; 674 675 if (result->size < aa->len) { 676 err = big_extend(result, aa->len); 677 if (err != BIG_OK) { 678 return (err); 679 } 680 } 681 682 result->len = aa->len; 683 a = aa->value; 684 r = result->value; 685 cy = 0; 686 for (i = aa->len - 1; i >= 0; i--) { 687 cy1 = a[i] << (BIG_CHUNK_SIZE - 1); 688 r[i] = (cy | (a[i] >> 1)); 689 cy = cy1; 690 } 691 if (r[result->len - 1] == 0) { 692 result->len--; 693 } 694 695 return (BIG_OK); 696 } 697 698 /* result = aa*2 */ 699 BIG_ERR_CODE 700 big_double(BIGNUM *result, BIGNUM *aa) 701 { 702 BIG_ERR_CODE err; 703 int i, rsize; 704 BIG_CHUNK_TYPE cy, cy1; 705 BIG_CHUNK_TYPE *a, *r; 706 707 if ((aa->len > 0) && 708 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) { 709 rsize = aa->len + 1; 710 } else { 711 rsize = aa->len; 712 } 713 714 if (result->size < rsize) { 715 err = big_extend(result, rsize); 716 if (err != BIG_OK) 717 return (err); 718 } 719 720 a = aa->value; 721 r = result->value; 722 if (rsize == aa->len + 1) { 723 r[rsize - 1] = 1; 724 } 725 cy = 0; 726 for (i = 0; i < aa->len; i++) { 727 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1); 728 r[i] = (cy | (a[i] << 1)); 729 cy = cy1; 730 } 731 result->len = rsize; 732 return (BIG_OK); 733 } 734 735 736 /* 737 * returns aa mod b, aa must be nonneg, b must be a max 738 * (BIG_CHUNK_SIZE / 2)-bit integer 739 */ 740 static uint32_t 741 big_modhalf_pos(BIGNUM *aa, uint32_t b) 742 { 743 int i; 744 BIG_CHUNK_TYPE rem; 745 746 if (aa->len == 0) { 747 return (0); 748 } 749 rem = aa->value[aa->len - 1] % b; 750 for (i = aa->len - 2; i >= 0; i--) { 751 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 752 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b; 753 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 754 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b; 755 } 756 757 return ((uint32_t)rem); 758 } 759 760 761 /* 762 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb 763 * result->size should be at least aa->len at entry 764 * aa, bb, and result should be positive 765 */ 766 void 767 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 768 { 769 int i, lendiff; 770 BIGNUM res1, aa1; 771 772 lendiff = aa->len - bb->len; 773 res1.size = result->size - lendiff; 774 res1.malloced = 0; 775 res1.value = result->value + lendiff; 776 aa1.size = aa->size - lendiff; 777 aa1.value = aa->value + lendiff; 778 aa1.len = bb->len; 779 aa1.sign = 1; 780 (void) big_sub_pos(&res1, &aa1, bb); 781 if (result->value != aa->value) { 782 for (i = 0; i < lendiff; i++) { 783 result->value[i] = aa->value[i]; 784 } 785 } 786 result->len = aa->len; 787 } 788 789 790 /* 791 * returns 1, 0, or -1 depending on whether |aa| > , ==, or < 792 * (2^BIG_CHUNK_SIZE)^lendiff * |bb| 793 * aa->len should be >= bb->len 794 */ 795 int 796 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb) 797 { 798 int lendiff; 799 BIGNUM aa1; 800 801 lendiff = aa->len - bb->len; 802 aa1.len = bb->len; 803 aa1.size = aa->size - lendiff; 804 aa1.malloced = 0; 805 aa1.value = aa->value + lendiff; 806 return (big_cmp_abs(&aa1, bb)); 807 } 808 809 810 /* 811 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer. 812 * result should have enough space allocated. 813 */ 814 static void 815 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 816 { 817 int i; 818 BIG_CHUNK_TYPE t1, t2, ai, cy; 819 BIG_CHUNK_TYPE *a, *r; 820 821 a = aa->value; 822 r = result->value; 823 cy = 0; 824 for (i = 0; i < aa->len; i++) { 825 ai = a[i]; 826 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 827 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 828 (t1 >> (BIG_CHUNK_SIZE / 2)); 829 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) | 830 (t2 << (BIG_CHUNK_SIZE / 2)); 831 cy = t2 >> (BIG_CHUNK_SIZE / 2); 832 } 833 r[i] = cy; 834 result->len = aa->len + 1; 835 result->sign = aa->sign; 836 } 837 838 839 /* 840 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max. 841 * (BIG_CHUNK_SIZE / 2)-bit positive integer. 842 * result should have enough space allocated. 843 */ 844 static void 845 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 846 { 847 int i; 848 BIG_CHUNK_TYPE t1, t2, ai, cy, ri; 849 BIG_CHUNK_TYPE *a, *r; 850 851 a = aa->value; 852 r = result->value; 853 cy = 0; 854 ri = 0; 855 for (i = 0; i < aa->len; i++) { 856 ai = a[i]; 857 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 858 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 859 (t1 >> (BIG_CHUNK_SIZE / 2)); 860 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri; 861 ri = t2 & BIG_CHUNK_LOWHALFBITS; 862 cy = t2 >> (BIG_CHUNK_SIZE / 2); 863 } 864 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri; 865 result->len = aa->len + 1; 866 result->sign = aa->sign; 867 } 868 869 870 /* it is assumed that result->size is big enough */ 871 void 872 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs) 873 { 874 int i; 875 BIG_CHUNK_TYPE cy, ai; 876 877 if (offs == 0) { 878 if (result != aa) { 879 (void) big_copy(result, aa); 880 } 881 return; 882 } 883 cy = 0; 884 for (i = 0; i < aa->len; i++) { 885 ai = aa->value[i]; 886 result->value[i] = (ai << offs) | cy; 887 cy = ai >> (BIG_CHUNK_SIZE - offs); 888 } 889 if (cy != 0) { 890 result->len = aa->len + 1; 891 result->value[result->len - 1] = cy; 892 } else { 893 result->len = aa->len; 894 } 895 result->sign = aa->sign; 896 } 897 898 899 /* it is assumed that result->size is big enough */ 900 void 901 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs) 902 { 903 int i; 904 BIG_CHUNK_TYPE cy, ai; 905 906 if (offs == 0) { 907 if (result != aa) { 908 (void) big_copy(result, aa); 909 } 910 return; 911 } 912 cy = aa->value[0] >> offs; 913 for (i = 1; i < aa->len; i++) { 914 ai = aa->value[i]; 915 result->value[i-1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy; 916 cy = ai >> offs; 917 } 918 result->len = aa->len; 919 result->value[result->len - 1] = cy; 920 result->sign = aa->sign; 921 } 922 923 924 /* 925 * result = aa/bb remainder = aa mod bb 926 * it is assumed that aa and bb are positive 927 */ 928 BIG_ERR_CODE 929 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb) 930 { 931 BIG_ERR_CODE err = BIG_OK; 932 int i, alen, blen, tlen, rlen, offs; 933 BIG_CHUNK_TYPE higha, highb, coeff; 934 BIG_CHUNK_TYPE *a, *b; 935 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2; 936 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 937 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 938 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE]; 939 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE]; 940 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE]; 941 942 a = aa->value; 943 b = bb->value; 944 alen = aa->len; 945 blen = bb->len; 946 while ((alen > 1) && (a[alen - 1] == 0)) { 947 alen = alen - 1; 948 } 949 aa->len = alen; 950 while ((blen > 1) && (b[blen - 1] == 0)) { 951 blen = blen - 1; 952 } 953 bb->len = blen; 954 if ((blen == 1) && (b[0] == 0)) { 955 return (BIG_DIV_BY_0); 956 } 957 958 if (big_cmp_abs(aa, bb) < 0) { 959 if ((remainder != NULL) && 960 ((err = big_copy(remainder, aa)) != BIG_OK)) { 961 return (err); 962 } 963 if (result != NULL) { 964 result->len = 1; 965 result->sign = 1; 966 result->value[0] = 0; 967 } 968 return (BIG_OK); 969 } 970 971 if ((err = big_init1(&bblow, blen + 1, 972 bblowvalue, arraysize(bblowvalue))) != BIG_OK) 973 return (err); 974 975 if ((err = big_init1(&bbhigh, blen + 1, 976 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK) 977 goto ret1; 978 979 if ((err = big_init1(&tmp1, alen + 2, 980 tmp1value, arraysize(tmp1value))) != BIG_OK) 981 goto ret2; 982 983 if ((err = big_init1(&tmp2, blen + 2, 984 tmp2value, arraysize(tmp2value))) != BIG_OK) 985 goto ret3; 986 987 if ((err = big_init1(&tresult, alen - blen + 2, 988 tresultvalue, arraysize(tresultvalue))) != BIG_OK) 989 goto ret4; 990 991 offs = 0; 992 highb = b[blen - 1]; 993 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) { 994 highb = highb >> (BIG_CHUNK_SIZE / 2); 995 offs = (BIG_CHUNK_SIZE / 2); 996 } 997 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) { 998 highb = highb << 1; 999 offs++; 1000 } 1001 1002 big_shiftleft(&bblow, bb, offs); 1003 1004 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) { 1005 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1006 } else { 1007 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1008 } 1009 if (bbhigh.value[bbhigh.len - 1] == 0) { 1010 bbhigh.len--; 1011 } else { 1012 bbhigh.value[bbhigh.len] = 0; 1013 } 1014 1015 highb = bblow.value[bblow.len - 1]; 1016 1017 big_shiftleft(&tmp1, aa, offs); 1018 rlen = tmp1.len - bblow.len + 1; 1019 tresult.len = rlen; 1020 1021 tmp1.len++; 1022 tlen = tmp1.len; 1023 tmp1.value[tmp1.len - 1] = 0; 1024 for (i = 0; i < rlen; i++) { 1025 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) + 1026 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2)); 1027 coeff = higha / (highb + 1); 1028 big_mulhalf_high(&tmp2, &bblow, coeff); 1029 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1030 bbhigh.len++; 1031 while (tmp1.value[tlen - 1] > 0) { 1032 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1033 coeff++; 1034 } 1035 bbhigh.len--; 1036 tlen--; 1037 tmp1.len--; 1038 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) { 1039 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1040 coeff++; 1041 } 1042 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2); 1043 higha = tmp1.value[tlen - 1]; 1044 coeff = higha / (highb + 1); 1045 big_mulhalf_low(&tmp2, &bblow, coeff); 1046 tmp2.len--; 1047 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1048 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) { 1049 big_sub_pos_high(&tmp1, &tmp1, &bblow); 1050 coeff++; 1051 } 1052 tresult.value[rlen - i - 1] = 1053 tresult.value[rlen - i - 1] + coeff; 1054 } 1055 1056 big_shiftright(&tmp1, &tmp1, offs); 1057 1058 err = BIG_OK; 1059 1060 if ((remainder != NULL) && 1061 ((err = big_copy(remainder, &tmp1)) != BIG_OK)) 1062 goto ret; 1063 1064 if (result != NULL) 1065 err = big_copy(result, &tresult); 1066 1067 ret: 1068 big_finish(&tresult); 1069 ret4: 1070 big_finish(&tmp1); 1071 ret3: 1072 big_finish(&tmp2); 1073 ret2: 1074 big_finish(&bbhigh); 1075 ret1: 1076 big_finish(&bblow); 1077 return (err); 1078 } 1079 1080 /* 1081 * If there is no processor-specific integer implementation of 1082 * the lower level multiply functions, then this code is provided 1083 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and 1084 * big_sqr_vec(). 1085 * 1086 * There are two generic implementations. One that assumes that 1087 * there is hardware and C compiler support for a 32 x 32 --> 64 1088 * bit unsigned multiply, but otherwise is not specific to any 1089 * processor, platform, or ISA. 1090 * 1091 * The other makes very few assumptions about hardware capabilities. 1092 * It does not even assume that there is an