integer.cpp
来自「这个文件是windows mysql源码」· C++ 代码 · 共 2,603 行 · 第 1/5 页
CPP
2,603 行
return false; bool result = true; if (setjmp(s_env)) result = false; else __asm __volatile ("xorpd %xmm0, %xmm0"); signal(SIGILL, oldHandler); return result;#endif}#endif // SSE2_INTRINSICS_AVAILABLEstatic bool IsP4(){ if (!IsPentium()) return false; word32 cpuid[4]; CpuId(1, cpuid); return ((cpuid[0] >> 8) & 0xf) == 0xf;}// ************** Pentium/P4 optimizations ***************class PentiumOptimized : public Portable{public: static word TAOCRYPT_CDECL Add(word *C, const word *A, const word *B, unsigned int N); static word TAOCRYPT_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N); static void TAOCRYPT_CDECL Multiply4(word *C, const word *A, const word *B); static void TAOCRYPT_CDECL Multiply8(word *C, const word *A, const word *B); static void TAOCRYPT_CDECL Multiply8Bottom(word *C, const word *A, const word *B);};class P4Optimized{public: static word TAOCRYPT_CDECL Add(word *C, const word *A, const word *B, unsigned int N); static word TAOCRYPT_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);#ifdef SSE2_INTRINSICS_AVAILABLE static void TAOCRYPT_CDECL Multiply4(word *C, const word *A, const word *B); static void TAOCRYPT_CDECL Multiply8(word *C, const word *A, const word *B); static void TAOCRYPT_CDECL Multiply8Bottom(word *C, const word *A, const word *B);#endif};typedef word (TAOCRYPT_CDECL * PAddSub)(word *C, const word *A, const word *B, unsigned int N);typedef void (TAOCRYPT_CDECL * PMul)(word *C, const word *A, const word *B);static PAddSub s_pAdd, s_pSub;#ifdef SSE2_INTRINSICS_AVAILABLEstatic PMul s_pMul4, s_pMul8, s_pMul8B;#endifstatic void SetPentiumFunctionPointers(){ if (!IsPentium()) { s_pAdd = &Portable::Add; s_pSub = &Portable::Subtract; } else if (IsP4()) { s_pAdd = &P4Optimized::Add; s_pSub = &P4Optimized::Subtract; } else { s_pAdd = &PentiumOptimized::Add; s_pSub = &PentiumOptimized::Subtract; }#ifdef SSE2_INTRINSICS_AVAILABLE if (!IsPentium()) { s_pMul4 = &Portable::Multiply4; s_pMul8 = &Portable::Multiply8; s_pMul8B = &Portable::Multiply8Bottom; } else if (HasSSE2()) { s_pMul4 = &P4Optimized::Multiply4; s_pMul8 = &P4Optimized::Multiply8; s_pMul8B = &P4Optimized::Multiply8Bottom; } else { s_pMul4 = &PentiumOptimized::Multiply4; s_pMul8 = &PentiumOptimized::Multiply8; s_pMul8B = &PentiumOptimized::Multiply8Bottom; }#endif}static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);class LowLevel : public PentiumOptimized{public: inline static word Add(word *C, const word *A, const word *B, unsigned int N) {return s_pAdd(C, A, B, N);} inline static word Subtract(word *C, const word *A, const word *B, unsigned int N) {return s_pSub(C, A, B, N);} inline static void Square4(word *R, const word *A) {Multiply4(R, A, A);}#ifdef SSE2_INTRINSICS_AVAILABLE inline static void Multiply4(word *C, const word *A, const word *B) {s_pMul4(C, A, B);} inline static void Multiply8(word *C, const word *A, const word *B) {s_pMul8(C, A, B);} inline static void Multiply8Bottom(word *C, const word *A, const word *B) {s_pMul8B(C, A, B);}#endif};// use some tricks to share assembly code between MSVC and GCC#ifdef _MSC_VER #define TAOCRYPT_NAKED __declspec(naked) #define AS1(x) __asm x #define AS2(x, y) __asm x, y #define AddPrologue \ __asm push ebp \ __asm push ebx \ __asm push esi \ __asm push edi \ __asm mov ecx, [esp+20] \ __asm mov edx, [esp+24] \ __asm mov ebx, [esp+28] \ __asm mov esi, [esp+32] #define AddEpilogue \ __asm pop edi \ __asm pop esi \ __asm pop ebx \ __asm pop ebp \ __asm ret #define MulPrologue \ __asm push ebp \ __asm push ebx \ __asm push esi \ __asm push edi \ __asm mov ecx, [esp+28] \ __asm mov esi, [esp+24] \ __asm push [esp+20] #define MulEpilogue \ __asm add esp, 4 \ __asm pop edi \ __asm pop esi \ __asm pop ebx \ __asm pop ebp \ __asm ret#else #define TAOCRYPT_NAKED #define AS1(x) #x ";" #define AS2(x, y) #x ", " #y ";" #define AddPrologue \ __asm__ __volatile__ \ ( \ "push %%ebx;" /* save this manually, in case of -fPIC */ \ "mov %2, %%ebx;" \ ".intel_syntax noprefix;" \ "push ebp;" #define AddEpilogue \ "pop ebp;" \ ".att_syntax prefix;" \ "pop %%ebx;" \ : \ : "c" (C), "d" (A), "m" (B), "S" (N) \ : "%edi", "memory", "cc" \ ); #define MulPrologue \ __asm__ __volatile__ \ ( \ "push %%ebx;" /* save this manually, in case of -fPIC */ \ "push %%ebp;" \ "push %0;" \ ".intel_syntax noprefix;" #define MulEpilogue \ "add esp, 4;" \ "pop ebp;" \ "pop ebx;" \ ".att_syntax prefix;" \ : \ : "rm" (Z), "S" (X), "c" (Y) \ : "%eax", "%edx", "%edi", "memory", "cc" \ );#endifTAOCRYPT_NAKED word PentiumOptimized::Add(word *C, const word *A, const word *B, unsigned int N){ AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( sub ecx, edx) // hold the distance between C & A so we // can add this to A to get C AS2( xor eax, eax) // clear eax AS2( sub eax, esi) // eax is a negative index from end of B AS2( lea ebx, [ebx+4*esi]) // ebx is end of B AS2( sar eax, 1) // unit of eax is now dwords; this also // clears the carry flag AS1( jz loopendAdd) // if no dwords then nothing to do AS1(loopstartAdd:) AS2( mov esi,[edx]) // load lower word of A AS2( mov ebp,[edx+4]) // load higher word of A AS2( mov edi,[ebx+8*eax]) // load lower word of B AS2( lea edx,[edx+8]) // advance A and C AS2( adc esi,edi) // add lower words AS2( mov edi,[ebx+8*eax+4]) // load higher word of B AS2( adc ebp,edi) // add higher words AS1( inc eax) // advance B AS2( mov [edx+ecx-8],esi) // store lower word result AS2( mov [edx+ecx-4],ebp) // store higher word result AS1( jnz loopstartAdd) // loop until eax overflows and becomes zero AS1(loopendAdd:) AS2( adc eax, 0) // store carry into eax (return result register) AddEpilogue}TAOCRYPT_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B, unsigned int N){ AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( sub ecx, edx) // hold the distance between C & A so we // can add this to A to get C AS2( xor eax, eax) // clear eax AS2( sub eax, esi) // eax is a negative index from end of B AS2( lea ebx, [ebx+4*esi]) // ebx is end of B AS2( sar eax, 1) // unit of eax is now dwords; this also // clears the carry flag AS1( jz loopendSub) // if no dwords then nothing to do AS1(loopstartSub:) AS2( mov esi,[edx]) // load lower word of A AS2( mov ebp,[edx+4]) // load higher word of A AS2( mov edi,[ebx+8*eax]) // load lower word of B AS2( lea edx,[edx+8]) // advance A and C AS2( sbb esi,edi) // subtract lower words AS2( mov edi,[ebx+8*eax+4]) // load higher word of B AS2( sbb ebp,edi) // subtract higher words AS1( inc eax) // advance B AS2( mov [edx+ecx-8],esi) // store lower word result AS2( mov [edx+ecx-4],ebp) // store higher word result AS1( jnz loopstartSub) // loop until eax overflows and becomes zero AS1(loopendSub:) AS2( adc eax, 0) // store carry into eax (return result register) AddEpilogue}// On Pentium 4, the adc and sbb instructions are very expensive, so avoid them.TAOCRYPT_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, unsigned int N){ AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( xor eax, eax) AS1( neg esi) AS1( jz loopendAddP4) // if no dwords then nothing to do AS2( mov edi, [edx]) AS2( mov ebp, [ebx]) AS1( jmp carry1AddP4) AS1(loopstartAddP4:) AS2( mov edi, [edx+8]) AS2( add ecx, 8) AS2( add edx, 8) AS2( mov ebp, [ebx]) AS2( add edi, eax) AS1( jc carry1AddP4) AS2( xor eax, eax) AS1(carry1AddP4:) AS2( add edi, ebp) AS2( mov ebp, 1) AS2( mov [ecx], edi) AS2( mov edi, [edx+4]) AS2( cmovc eax, ebp) AS2( mov ebp, [ebx+4]) AS2( add ebx, 8) AS2( add edi, eax) AS1( jc carry2AddP4) AS2( xor eax, eax) AS1(carry2AddP4:) AS2( add edi, ebp) AS2( mov ebp, 1) AS2( cmovc eax, ebp) AS2( mov [ecx+4], edi) AS2( add esi, 2) AS1( jnz loopstartAddP4) AS1(loopendAddP4:) AddEpilogue}TAOCRYPT_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B, unsigned int N){ AddPrologue // now: ebx = B, ecx = C, edx = A, esi = N AS2( xor eax, eax) AS1( neg esi) AS1( jz loopendSubP4) // if no dwords then nothing to do AS2( mov edi, [edx]) AS2( mov ebp, [ebx]) AS1( jmp carry1SubP4) AS1(loopstartSubP4:) AS2( mov edi, [edx+8]) AS2( add edx, 8) AS2( add ecx, 8) AS2( mov ebp, [ebx]) AS2( sub edi, eax) AS1( jc carry1SubP4) AS2( xor eax, eax) AS1(carry1SubP4:) AS2( sub edi, ebp) AS2( mov ebp, 1) AS2( mov [ecx], edi) AS2( mov edi, [edx+4]) AS2( cmovc eax, ebp) AS2( mov ebp, [ebx+4]) AS2( add ebx, 8) AS2( sub edi, eax) AS1( jc carry2SubP4) AS2( xor eax, eax) AS1(carry2SubP4:) AS2( sub edi, ebp) AS2( mov ebp, 1) AS2( cmovc eax, ebp) AS2( mov [ecx+4], edi) AS2( add esi, 2) AS1( jnz loopstartSubP4) AS1(loopendSubP4:) AddEpilogue}// multiply assembly code originally contributed by Leonard Janke#define MulStartup \ AS2(xor ebp, ebp) \ AS2(xor edi, edi) \ AS2(xor ebx, ebx) #define MulShiftCarry \ AS2(mov ebp, edx) \ AS2(mov edi, ebx) \ AS2(xor ebx, ebx)#define MulAccumulateBottom(i,j) \ AS2(mov eax, [ecx+4*j]) \ AS2(imul eax, dword ptr [esi+4*i]) \ AS2(add ebp, eax)#define MulAccumulate(i,j) \ AS2(mov eax, [ecx+4*j]) \ AS1(mul dword ptr [esi+4*i]) \ AS2(add ebp, eax) \ AS2(adc edi, edx) \ AS2(adc bl, bh)#define MulStoreDigit(i) \ AS2(mov edx, edi) \ AS2(mov edi, [esp]) \ AS2(mov [edi+4*i], ebp)#define MulLastDiagonal(digits) \ AS2(mov eax, [ecx+4*(digits-1)]) \ AS1(mul dword ptr [esi+4*(digits-1)]) \ AS2(add ebp, eax) \ AS2(adc edx, edi) \ AS2(mov edi, [esp]) \ AS2(mov [edi+4*(2*digits-2)], ebp) \ AS2(mov [edi+4*(2*digits-1)], edx)TAOCRYPT_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y){ MulPrologue // now: [esp] = Z, esi = X, ecx = Y MulStartup MulAccumulate(0,0) MulStoreDigit(0) MulShiftCarry MulAccumulate(1,0) MulAccumulate(0,1) MulStoreDigit(1) MulShiftCarry MulAccumulate(2,0) MulAccumulate(1,1) MulAccumulate(0,2) MulStoreDigit(2) MulShiftCarry MulAccumulate(3,0) MulAccumulate(2,1) MulAccumulate(1,2) MulAccumulate(0,3) MulStoreDigit(3) MulShiftCarry MulAccumulate(3,1) MulAccumulate(2,2) MulAccumulate(1,3) MulStoreDigit(4) MulShiftCarry MulAccumulate(3,2) MulAccumulate(2,3) MulStoreDigit(5) MulShiftCarry MulLastDiagonal(4) MulEpilogue}TAOCRYPT_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y){ MulPrologue // now: [esp] = Z, esi = X, ecx = Y MulStartup MulAccumulate(0,0) MulStoreDigit(0) MulShiftCarry MulAccumulate(1,0) MulAccumulate(0,1) MulStoreDigit(1) MulShiftCarry MulAccumulate(2,0) MulAccumulate(1,1) MulAccumulate(0,2) MulStoreDigit(2) MulShiftCarry MulAccumulate(3,0) MulAccumulate(2,1) MulAccumulate(1,2) MulAccumulate(0,3) MulStoreDigit(3) MulShiftCarry MulAccumulate(4,0) MulAccumulate(3,1) MulAccumulate(2,2) MulAccumulate(1,3) MulAccumulate(0,4) MulStoreDigit(4) MulShiftCarry MulAccumulate(5,0) MulAccumulate(4,1) MulAccumulate(3,2) MulAccumulate(2,3) MulAccumulate(1,4) MulAccumulate(0,5) MulStoreDigit(5) MulShiftCarry MulAccumulate(6,0) MulAccumulate(5,1) MulAccumulate(4,2) MulAccumulate(3,3) MulAccumulate(2,4) MulAccumulate(1,5) MulAccumulate(0,6) MulStoreDigit(6) MulShiftCarry
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?