📄 bignumber.cpp
字号:
// BigNumber.cpp: implementation of the CBigNumber class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "RSAUtil.h"
#include "BigNumber.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//#define __USE_BIG_MEM__
#if defined(__USE_BIG_MEM__)
#include "BigMem.h"
static CBigMem* s_pmem = NULL;
#endif
const char c_str_HEX[] = "0123456789ABCDEF";
typedef unsigned __int64 BNWORD64;
typedef unsigned long BNWORD32;
#define BNWORD64 BNWORD64
#define BN_LITTLE_ENDIAN 1
/* Macros to choose between big and little endian */
#if BN_BIG_ENDIAN
#define BIG(b) b
#define LITTLE(l) /*nothing*/
#define BIGLITTLE(b,l) b
#elif BN_LITTLE_ENDIAN
#define BIG(b) /*nothing*/
#define LITTLE(l) l
#define BIGLITTLE(b,l) l
#else
#error One of BN_BIG_ENDIAN or BN_LITTLE_ENDIAN must be defined as 1
#endif
#if defined(__USE_BIG_MEM__)
#define bniMemAlloc(size) s_pmem->Alloc(size)
#define bniRealloc(ptr, oldlen, newlen) s_pmem->Realloc(ptr, newlen)
#define bniMemFree(ptr, size) s_pmem->Free(ptr)
#else
#define bniMemAlloc(size) malloc(size)
#define bniMemFree(ptr, size) free(ptr)
#define IsNull(a) (NULL == (a))
void *
bniRealloc(void* oldptr, unsigned oldbytes, unsigned newbytes)
{
void *newptr = bniMemAlloc(newbytes);
if ( IsNull( newptr ) )
return newptr;
if ( IsNull( oldptr ) )
return BIGLITTLE((char *)newptr+newbytes, newptr);
/*
* The following copies are a bit non-obvious in the big-endian case
* because one of the pointers points to the *end* of allocated memory.
*/
if (newbytes > oldbytes) { /* Copy all of old into part of new */
BIG(newptr = (char *)newptr + newbytes;)
BIG(oldptr = (char *)oldptr - oldbytes;)
memcpy(BIGLITTLE((char *)newptr-oldbytes, newptr), oldptr,
oldbytes);
} else { /* Copy part of old into all of new */
memcpy(newptr, BIGLITTLE((char *)oldptr-newbytes, oldptr),
newbytes);
BIG(newptr = (char *)newptr + newbytes;)
BIG(oldptr = (char *)oldptr - oldbytes;)
}
bniMemFree(oldptr, oldbytes);
return newptr;
}
#endif
#define BNIALLOC(p,type,words) BIGLITTLE( \
if ( ((p) = (type *)bniMemAlloc((words)*sizeof*(p))) != 0) \
(p) += (words), \
(p) = (type *)bniMemAlloc((words) * sizeof*(p)) \
)
#define BNIFREE(p,words) bniMemFree((p) BIG(-(words)), (words) * sizeof*(p))
#define BNIREALLOC(p,old,newP) bniRealloc(p, (old) * sizeof*(p), (newP) * sizeof*(p))
CBigNumber::CBigNumber()
{
ptr = NULL;
size = 0;
allocated = 0;
#if defined(__USE_BIG_MEM__)
if (!s_pmem)
{
s_pmem = CBigMem::Create();
}
ASSERT(s_pmem);
s_pmem->Refrence();
#endif
}
CBigNumber::~CBigNumber()
{
if (ptr) BNIFREE(ptr, -1);
ptr = NULL;
size = 0;
allocated = 0;
#if defined(__USE_BIG_MEM__)
ASSERT(s_pmem);
s_pmem->Release();
#endif
}
/////////////////////////////////////////////////////////////////////////////////////////////////
//bni xxx static functions
/*
* Most of the multiply (and Montgomery reduce) routines use an outer
* loop that iterates over one of the operands - a so-called operand
* scanning approach. One big advantage of this is that the assembly
* support routines are simpler. The loops can be rearranged to have
* an outer loop that iterates over the product, a so-called product
* scanning approach. This has the advantage of writing less data
* and doing fewer adds to memory, so is supposedly faster. Some
* code has been written using a product-scanning approach, but
* it appears to be slower, so it is turned off by default. Some
* experimentation would be appreciated.
*
* (The code is also annoying to get right and not very well commented,
* one of my pet peeves about math libraries. I'm sorry.)
*/
#ifndef PRODUCT_SCAN
#define PRODUCT_SCAN 0
#endif
/* Function prototypes for the inline asm routines */
BNWORD32 CDECL
bniMulAdd1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k);
#define bniMulAdd1_32 bniMulAdd1_32
BNWORD32 CDECL
bniMulSub1_32(BNWORD32 *out, BNWORD32 const *in, unsigned len, BNWORD32 k);
#define bniMulSub1_32 bniMulSub1_32
/* Disable warning for no return value, typical of asm functions */
#pragma warning( disable : 4035 )
BNWORD32
bniMulAdd1_32(
BNWORD32 *outParam, BNWORD32 const *inParam, unsigned len, BNWORD32 k)
{
__asm
{
mov esi, inParam ; load inParam
mov edi, outParam ; load outParam
mov ecx, k ; load k
push ebp ; preserve ebp for return block
mov ebp, len ; load len (must be last)
;; First multiply step has no carry in.
mov eax,[esi] ; U
mov ebx,[edi] ; V
mul ecx ; NP first multiply
add ebx,eax ; U
lea eax,[ebp*4-4] ; V loop unrolling
adc edx,0 ; U
and eax,12 ; V loop unrolling
mov [edi],ebx ; U
add esi,eax ; V loop unrolling
add edi,eax ; U loop unrolling
; inline assembler won't do tables, so use simple compare code
; jmp DWORD PTR ma32_jumptable[eax] ; NP loop unrolling
;
; align 4
;ma32_jumptable:
; dd ma32_case0
; dd ma32_case1
; dd ma32_case2
; dd ma32_case3
;
; nop
cmp eax, 0
je ma32_case0
cmp eax, 4
je ma32_case1
cmp eax, 8
je ma32_case2
jmp ma32_case3
align 8
nop
nop
nop ; To align loop properly
ma32_case0:
sub ebp,4 ; U
jbe SHORT ma32_done ; V
ma32_loop:
mov eax,[esi+4] ; U
mov ebx,edx ; V Remember carry for later
add esi,16 ; U
add edi,16 ; V
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi-12] ; V
adc edx,0 ; U
add ebx,eax ; V
adc edx,0 ; U
mov [edi-12],ebx ; V
ma32_case3:
mov eax,[esi-8] ; U
mov ebx,edx ; V Remember carry for later
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi-8] ; V
adc edx,0 ; U
add ebx,eax ; V
adc edx,0 ; U
mov [edi-8],ebx ; V
ma32_case2:
mov eax,[esi-4] ; U
mov ebx,edx ; V Remember carry for later
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi-4] ; V
adc edx,0 ; U
add ebx,eax ; V
adc edx,0 ; U
mov [edi-4],ebx ; V
ma32_case1:
mov eax,[esi] ; U
mov ebx,edx ; V Remember carry for later
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi] ; V
adc edx,0 ; U
add ebx,eax ; V
adc edx,0 ; U
mov [edi],ebx ; V
sub ebp,4 ; U
ja SHORT ma32_loop ; V
ma32_done:
mov eax,edx ; U
pop ebp
}
}
BNWORD32
bniMulSub1_32(
BNWORD32 *outParam, BNWORD32 const *inParam, unsigned len, BNWORD32 k)
{
__asm
{
mov esi, inParam ; load inParam
mov edi, outParam ; load outParam
mov ecx, k ; load k
push ebp ; preserve ebp for return block
mov ebp, len ; load len (must be last)
;; First multiply step has no carry in.
mov eax,[esi] ; V
mov ebx,[edi] ; U
mul ecx ; NP first multiply
sub ebx,eax ; U
lea eax,[ebp*4-4] ; V loop unrolling
adc edx,0 ; U
and eax,12 ; V loop unrolling
mov [edi],ebx ; U
add esi,eax ; V loop unrolling
add edi,eax ; U loop unrolling
; inline assembler won't do tables, so use simple compare code
; jmp DWORD PTR ms32_jumptable[eax] ; NP loop unrolling
;
; align 4
;ms32_jumptable:
; dd ms32_case0
; dd ms32_case1
; dd ms32_case2
; dd ms32_case3
;
; nop
cmp eax, 0
je ms32_case0
cmp eax, 4
je ms32_case1
cmp eax, 8
je ms32_case2
jmp ms32_case3
align 8
nop
nop
nop
ms32_case0:
sub ebp,4 ; U
jbe SHORT ms32_done ; V
ms32_loop:
mov eax,[esi+4] ; U
mov ebx,edx ; V Remember carry for later
add esi,16 ; U
add edi,16 ; V
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi-12] ; V
adc edx,0 ; U
sub ebx,eax ; V
adc edx,0 ; U
mov [edi-12],ebx ; V
ms32_case3:
mov eax,[esi-8] ; U
mov ebx,edx ; V Remember carry for later
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi-8] ; V
adc edx,0 ; U
sub ebx,eax ; V
adc edx,0 ; U
mov [edi-8],ebx ; V
ms32_case2:
mov eax,[esi-4] ; U
mov ebx,edx ; V Remember carry for later
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi-4] ; V
adc edx,0 ; U
sub ebx,eax ; V
adc edx,0 ; U
mov [edi-4],ebx ; V
ms32_case1:
mov eax,[esi] ; U
mov ebx,edx ; V Remember carry for later
mul ecx ; NP
add eax,ebx ; U Add carry in from previous word
mov ebx,[edi] ; V
adc edx,0 ; U
sub ebx,eax ; V
adc edx,0 ; U
mov [edi],ebx ; V
sub ebp,4 ; U
ja SHORT ms32_loop ; V
ms32_done:
mov eax,edx ; U
pop ebp
}
}
/* Reenable missing return value warning */
#pragma warning( default : 4035 )
/*
* Copy an array of words. <Marvin mode on> Thrilling, isn't it? </Marvin>
* This is a good example of how the byte offsets and BIGLITTLE() macros work.
* Another alternative would have been
* memcpy(dest BIG(-len), src BIG(-len), len*sizeof(BNWORD32)), but I find that
* putting operators into conditional macros is confusing.
*/
#ifndef bniCopy_32
void
bniCopy_32(BNWORD32 *dest, BNWORD32 const *src, unsigned len)
{
memcpy(BIGLITTLE(dest-len,dest), BIGLITTLE(src-len,src), len * sizeof(*src));
}
#endif /* !bniCopy_32 */
/*
* Fill n words with zero. This does it manually rather than calling
* memset because it can assume alignment to make things faster while
* memset can't. Note how big-endian numbers are naturally addressed
* using predecrement, while little-endian is postincrement.
*/
#ifndef bniZero_32
void
bniZero_32(BNWORD32 *num, unsigned len)
{
while (len--)
BIGLITTLE(*--num,*num++) = 0;
}
#endif /* !bniZero_32 */
/*
* Negate an array of words.
* Negation is subtraction from zero. Negating low-order words
* entails doing nothing until a non-zero word is hit. Once that
* is negated, a borrow is generated and never dies until the end
* of the number is hit. Negation with borrow, -x-1, is the same as ~x.
* Repeat that until the end of the number.
*
* Doesn't return borrow out because that's pretty useless - it's
* always set unless the input is 0, which is easy to notice in
* normalized form.
*/
#ifndef bniNeg_32
void
bniNeg_32(BNWORD32 *num, unsigned len)
{
ASSERT(len);
/* Skip low-order zero words */
while (BIGLITTLE(*--num,*num) == 0) {
if (!--len)
return;
LITTLE(num++;)
}
/* Negate the lowest-order non-zero word */
*num = -*num;
/* Complement all the higher-order words */
while (--len) {
BIGLITTLE(--num,++num);
*num = ~*num;
}
}
#endif /* !bniNeg_32 */
/*
* bniAdd1_32: add the single-word "carry" to the given number.
* Used for minor increments and propagating the carry after
* adding in a shorter bignum.
*
* Technique: If we have a double-width word, presumably the compiler
* can add using its carry in inline code, so we just use a larger
* accumulator to compute the carry from the first addition.
* If not, it's more complex. After adding the first carry, which may
* be > 1, compare the sum and the carry. If the sum wraps (causing a
* carry out from the addition), the result will be less than each of the
* inputs, since the wrap subtracts a number (2^32) which is larger than
* the other input can possibly be. If the sum is >= the carry input,
* return success immediately.
* In either case, if there is a carry, enter a loop incrementing words
* until one does not wrap. Since we are adding 1 each time, the wrap
* will be to 0 and we can test for equality.
*/
#ifndef bniAdd1_32 /* If defined, it's provided as an asm subroutine */
#ifdef BNWORD64
BNWORD32
bniAdd1_32(BNWORD32 *num, unsigned len, BNWORD32 carry)
{
BNWORD64 t;
ASSERT(len > 0); /* Alternative: if (!len) return carry */
t = (BNWORD64)BIGLITTLE(*--num,*num) + carry;
BIGLITTLE(*num,*num++) = (BNWORD32)t;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -