⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 integer.cpp

📁 研读AxCrypt对加解密的处理方法
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	DWord d = DWord::Multiply(D[di], D[di+2]);
	D[1] = D[3] = 0;
	unsigned int si = ai + !bi;
	word s = D[si];

	DWord A0B0 = DWord::Multiply(A[0], B[0]);
	DWord t = A0B0 + C[0];
	C[0] = t.GetLowHalf();

	DWord A1B1 = DWord::Multiply(A[1], B[1]);
	t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
	C[1] = t.GetLowHalf();

	t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
	C[2] = t.GetLowHalf();

	t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
	C[3] = t.GetLowHalf();
	return t.GetHighHalf();
}

#define MulAcc(x, y)								\
	p = DWord::MultiplyAndAdd(A[x], B[y], c);		\
	c = p.GetLowHalf();								\
	p = (DWord) d + p.GetHighHalf();					\
	d = p.GetLowHalf();								\
	e += p.GetHighHalf();

#define SaveMulAcc(s, x, y) 						\
	R[s] = c;										\
	p = DWord::MultiplyAndAdd(A[x], B[y], d);				\
	c = p.GetLowHalf();								\
	p = (DWord) e + p.GetHighHalf();					\
	d = p.GetLowHalf();								\
	e = p.GetHighHalf();

#define SquAcc(x, y)								\
	q = DWord::Multiply(A[x], A[y]);	\
	p = q + c; 					\
	c = p.GetLowHalf();								\
	p = (DWord) d + p.GetHighHalf();					\
	d = p.GetLowHalf();								\
	e += p.GetHighHalf();			\
	p = q + c; 					\
	c = p.GetLowHalf();								\
	p = (DWord) d + p.GetHighHalf();					\
	d = p.GetLowHalf();								\
	e += p.GetHighHalf();

#define SaveSquAcc(s, x, y) 						\
	R[s] = c;										\
	q = DWord::Multiply(A[x], A[y]);	\
	p = q + d; 					\
	c = p.GetLowHalf();								\
	p = (DWord) e + p.GetHighHalf();					\
	d = p.GetLowHalf();								\
	e = p.GetHighHalf();			\
	p = q + c; 					\
	c = p.GetLowHalf();								\
	p = (DWord) d + p.GetHighHalf();					\
	d = p.GetLowHalf();								\
	e += p.GetHighHalf();

void Portable::Multiply4(word *R, const word *A, const word *B)
{
	DWord p;
	word c, d, e;

	p = DWord::Multiply(A[0], B[0]);
	R[0] = p.GetLowHalf();
	c = p.GetHighHalf();
	d = e = 0;

	MulAcc(0, 1);
	MulAcc(1, 0);

	SaveMulAcc(1, 2, 0);
	MulAcc(1, 1);
	MulAcc(0, 2);

	SaveMulAcc(2, 0, 3);
	MulAcc(1, 2);
	MulAcc(2, 1);
	MulAcc(3, 0);

	SaveMulAcc(3, 3, 1);
	MulAcc(2, 2);
	MulAcc(1, 3);

	SaveMulAcc(4, 2, 3);
	MulAcc(3, 2);

	R[5] = c;
	p = DWord::MultiplyAndAdd(A[3], B[3], d);
	R[6] = p.GetLowHalf();
	R[7] = e + p.GetHighHalf();
}

void Portable::Square2(word *R, const word *A)
{
	DWord p, q;
	word c, d, e;

	p = DWord::Multiply(A[0], A[0]);
	R[0] = p.GetLowHalf();
	c = p.GetHighHalf();
	d = e = 0;

	SquAcc(0, 1);

	R[1] = c;
	p = DWord::MultiplyAndAdd(A[1], A[1], d);
	R[2] = p.GetLowHalf();
	R[3] = e + p.GetHighHalf();
}

void Portable::Square4(word *R, const word *A)
{
#ifdef _MSC_VER
	// VC60 workaround: MSVC 6.0 has an optimization bug that makes
	// (dword)A*B where either A or B has been cast to a dword before
	// very expensive. Revisit this function when this
	// bug is fixed.
	Multiply4(R, A, A);
#else
	const word *B = A;
	DWord p, q;
	word c, d, e;

	p = DWord::Multiply(A[0], A[0]);
	R[0] = p.GetLowHalf();
	c = p.GetHighHalf();
	d = e = 0;

	SquAcc(0, 1);

	SaveSquAcc(1, 2, 0);
	MulAcc(1, 1);

	SaveSquAcc(2, 0, 3);
	SquAcc(1, 2);

	SaveSquAcc(3, 3, 1);
	MulAcc(2, 2);

	SaveSquAcc(4, 2, 3);

	R[5] = c;
	p = DWord::MultiplyAndAdd(A[3], A[3], d);
	R[6] = p.GetLowHalf();
	R[7] = e + p.GetHighHalf();
#endif
}

void Portable::Multiply8(word *R, const word *A, const word *B)
{
	DWord p;
	word c, d, e;

	p = DWord::Multiply(A[0], B[0]);
	R[0] = p.GetLowHalf();
	c = p.GetHighHalf();
	d = e = 0;

	MulAcc(0, 1);
	MulAcc(1, 0);

	SaveMulAcc(1, 2, 0);
	MulAcc(1, 1);
	MulAcc(0, 2);

	SaveMulAcc(2, 0, 3);
	MulAcc(1, 2);
	MulAcc(2, 1);
	MulAcc(3, 0);

	SaveMulAcc(3, 0, 4);
	MulAcc(1, 3);
	MulAcc(2, 2);
	MulAcc(3, 1);
	MulAcc(4, 0);

	SaveMulAcc(4, 0, 5);
	MulAcc(1, 4);
	MulAcc(2, 3);
	MulAcc(3, 2);
	MulAcc(4, 1);
	MulAcc(5, 0);

	SaveMulAcc(5, 0, 6);
	MulAcc(1, 5);
	MulAcc(2, 4);
	MulAcc(3, 3);
	MulAcc(4, 2);
	MulAcc(5, 1);
	MulAcc(6, 0);

	SaveMulAcc(6, 0, 7);
	MulAcc(1, 6);
	MulAcc(2, 5);
	MulAcc(3, 4);
	MulAcc(4, 3);
	MulAcc(5, 2);
	MulAcc(6, 1);
	MulAcc(7, 0);

	SaveMulAcc(7, 1, 7);
	MulAcc(2, 6);
	MulAcc(3, 5);
	MulAcc(4, 4);
	MulAcc(5, 3);
	MulAcc(6, 2);
	MulAcc(7, 1);

	SaveMulAcc(8, 2, 7);
	MulAcc(3, 6);
	MulAcc(4, 5);
	MulAcc(5, 4);
	MulAcc(6, 3);
	MulAcc(7, 2);

	SaveMulAcc(9, 3, 7);
	MulAcc(4, 6);
	MulAcc(5, 5);
	MulAcc(6, 4);
	MulAcc(7, 3);

	SaveMulAcc(10, 4, 7);
	MulAcc(5, 6);
	MulAcc(6, 5);
	MulAcc(7, 4);

	SaveMulAcc(11, 5, 7);
	MulAcc(6, 6);
	MulAcc(7, 5);

	SaveMulAcc(12, 6, 7);
	MulAcc(7, 6);

	R[13] = c;
	p = DWord::MultiplyAndAdd(A[7], B[7], d);
	R[14] = p.GetLowHalf();
	R[15] = e + p.GetHighHalf();
}

void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
{
	DWord p;
	word c, d, e;

	p = DWord::Multiply(A[0], B[0]);
	R[0] = p.GetLowHalf();
	c = p.GetHighHalf();
	d = e = 0;

	MulAcc(0, 1);
	MulAcc(1, 0);

	SaveMulAcc(1, 2, 0);
	MulAcc(1, 1);
	MulAcc(0, 2);

	R[2] = c;
	R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
}

void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
{
	DWord p;
	word c, d, e;

	p = DWord::Multiply(A[0], B[0]);
	R[0] = p.GetLowHalf();
	c = p.GetHighHalf();
	d = e = 0;

	MulAcc(0, 1);
	MulAcc(1, 0);

	SaveMulAcc(1, 2, 0);
	MulAcc(1, 1);
	MulAcc(0, 2);

	SaveMulAcc(2, 0, 3);
	MulAcc(1, 2);
	MulAcc(2, 1);
	MulAcc(3, 0);

	SaveMulAcc(3, 0, 4);
	MulAcc(1, 3);
	MulAcc(2, 2);
	MulAcc(3, 1);
	MulAcc(4, 0);

	SaveMulAcc(4, 0, 5);
	MulAcc(1, 4);
	MulAcc(2, 3);
	MulAcc(3, 2);
	MulAcc(4, 1);
	MulAcc(5, 0);

	SaveMulAcc(5, 0, 6);
	MulAcc(1, 5);
	MulAcc(2, 4);
	MulAcc(3, 3);
	MulAcc(4, 2);
	MulAcc(5, 1);
	MulAcc(6, 0);

	R[6] = c;
	R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
				A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
}

#undef MulAcc
#undef SaveMulAcc
#undef SquAcc
#undef SaveSquAcc

#ifdef CRYPTOPP_X86ASM_AVAILABLE

// ************** x86 feature detection ***************

static bool s_sse2Enabled = true;

static void CpuId(word32 input, word32 *output)
{
#ifdef __GNUC__
	__asm__
	(
		// save ebx in case -fPIC is being used
		"push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
		: "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
		: "a" (input)
	);
#else
	__asm
	{
		mov eax, input
		cpuid
		mov edi, output
		mov [edi], eax
		mov [edi+4], ebx
		mov [edi+8], ecx
		mov [edi+12], edx
	}
#endif
}

#ifdef SSE2_INTRINSICS_AVAILABLE
#ifndef _MSC_VER
static jmp_buf s_env;
static void SigIllHandler(int)
{
	longjmp(s_env, 1);
}
#endif

static bool HasSSE2()
{
	if (!s_sse2Enabled)
		return false;

	word32 cpuid[4];
	CpuId(1, cpuid);
	if ((cpuid[3] & (1 << 26)) == 0)
		return false;

#ifdef _MSC_VER
    __try
	{
        __asm xorpd xmm0, xmm0        // executing SSE2 instruction
	}
    __except (1)
	{
		return false;
    }
	return true;
#else
	typedef void (*SigHandler)(int);

	SigHandler oldHandler = signal(SIGILL, SigIllHandler);
	if (oldHandler == SIG_ERR)
		return false;

	bool result = true;
	if (setjmp(s_env))
		result = false;
	else
		__asm __volatile ("xorps %xmm0, %xmm0");

	signal(SIGILL, oldHandler);
	return result;
#endif
}
#endif

static bool IsP4()
{
	word32 cpuid[4];

	CpuId(0, cpuid);
	std::swap(cpuid[2], cpuid[3]);
	if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
		return false;

	CpuId(1, cpuid);
	return ((cpuid[0] >> 8) & 0xf) == 0xf;
}

// ************** Pentium/P4 optimizations ***************

class PentiumOptimized : public Portable
{
public:
	static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
	static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
	static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
	static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
	static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
};

class P4Optimized
{
public:
	static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
	static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
#ifdef SSE2_INTRINSICS_AVAILABLE
	static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
	static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
	static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
#endif
};

typedef word (CRYPTOPP_CDECL * PAddSub)(word *C, const word *A, const word *B, unsigned int N);
typedef void (CRYPTOPP_CDECL * PMul)(word *C, const word *A, const word *B);

static PAddSub s_pAdd, s_pSub;
#ifdef SSE2_INTRINSICS_AVAILABLE
static PMul s_pMul4, s_pMul8, s_pMul8B;
#endif

static void SetPentiumFunctionPointers()
{
	if (IsP4())
	{
		s_pAdd = &P4Optimized::Add;
		s_pSub = &P4Optimized::Subtract;
	}
	else
	{
		s_pAdd = &PentiumOptimized::Add;
		s_pSub = &PentiumOptimized::Subtract;
	}

#ifdef SSE2_INTRINSICS_AVAILABLE
	if (HasSSE2())
	{
		s_pMul4 = &P4Optimized::Multiply4;
		s_pMul8 = &P4Optimized::Multiply8;
		s_pMul8B = &P4Optimized::Multiply8Bottom;
	}
	else
	{
		s_pMul4 = &PentiumOptimized::Multiply4;
		s_pMul8 = &PentiumOptimized::Multiply8;
		s_pMul8B = &PentiumOptimized::Multiply8Bottom;
	}
#endif
}

static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);

void DisableSSE2()
{
	s_sse2Enabled = false;
	SetPentiumFunctionPointers();
}

class LowLevel : public PentiumOptimized
{
public:
	inline static word Add(word *C, const word *A, const word *B, unsigned int N)
		{return s_pAdd(C, A, B, N);}
	inline static word Subtract(word *C, const word *A, const word *B, unsigned int N)
		{return s_pSub(C, A, B, N);}
	inline static void Square4(word *R, const word *A)
		{Multiply4(R, A, A);}
#ifdef SSE2_INTRINSICS_AVAILABLE
	inline static void Multiply4(word *C, const word *A, const word *B)
		{s_pMul4(C, A, B);}
	inline static void Multiply8(word *C, const word *A, const word *B)
		{s_pMul8(C, A, B);}
	inline static void Multiply8Bottom(word *C, const word *A, const word *B)
		{s_pMul8B(C, A, B);}
#endif
};

// use some tricks to share assembly code between MSVC and GCC
#ifdef _MSC_VER
	#define CRYPTOPP_NAKED __declspec(naked)
	#define AS1(x) __asm x
	#define AS2(x, y) __asm x, y
	#define AddPrologue \
		__asm	push ebp \
		__asm	push ebx \
		__asm	push esi \
		__asm	push edi \
		__asm	mov		ecx, [esp+20] \
		__asm	mov		edx, [esp+24] \
		__asm	mov		ebx, [esp+28] \
		__asm	mov		esi, [esp+32]
	#define AddEpilogue \
		__asm	pop edi \
		__asm	pop esi \
		__asm	pop ebx \
		__asm	pop ebp \
		__asm	ret
	#define MulPrologue \
		__asm	push ebp \
		__asm	push ebx \
		__asm	push esi \
		__asm	push edi \
		__asm	mov ecx, [esp+28] \
		__asm	mov esi, [esp+24] \
		__asm	push [esp+20]
	#define MulEpilogue \
		__asm	add esp, 4 \
		__asm	pop edi \
		__asm	pop esi \
		__asm	pop ebx \
		__asm	pop ebp \
		__asm	ret
#else
	#define CRYPTOPP_NAKED
	#define AS1(x) #x ";"
	#define AS2(x, y) #x ", " #y ";"
	#define AddPrologue \
		__asm__ __volatile__ \
		( \
			"push %%ebx;"	/* save this manually, in case of -fPIC */ \
			"mov %2, %%ebx;" \
			".intel_syntax noprefix;" \
			"push ebp;"
	#define AddEpilogue \
			"pop ebp;" \
			".att_syntax prefix;" \
			"pop %%ebx;" \
					: \
					: "c" (C), "d" (A), "m" (B), "S" (N) \
					: "%edi", "memory", "cc" \
		);
	#define MulPrologue \
		__asm__ __volatile__ \
		( \
			"push %%ebx;"	/* save this manually, in case of -fPIC */ \
			"push %%ebp;" \
			"push %0;" \
			".intel_syntax noprefix;"
	#define MulEpilogue \
			"add esp, 4;" \
			"pop ebp;" \
			"pop ebx;" \
			".att_syntax prefix;" \
			: \
			: "rm" (Z), "S" (X), "c" (Y) \
			: "%eax", "%edx", "%edi", "memory", "cc" \
		);
#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -