⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 integer.cpp

📁 加密算法RSA
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	AS2(	movdqa		xmm2, [ebx])	\
	AS2(	pmuludq		xmm0, xmm1)				\
	AS2(	pmuludq		xmm1, [edx-(i)*16])	\
	AS2(	movdqa		xmm3, xmm2)			\
	AS2(	pand		xmm2, xmm0)			\
	AS2(	psrld		xmm0, 16)			\
	AS2(	paddd		xmm4, xmm2)			\
	AS2(	paddd		xmm5, xmm0)			\
	AS2(	pand		xmm3, xmm1)			\
	AS2(	psrld		xmm1, 16)			\
	AS2(	paddd		xmm6, xmm3)			\
	AS2(	paddd		xmm7, xmm1)		\

#define Squ_Acc1(i)		
#define Squ_Acc2(i)		ASC(call, LSqu##i)
#define Squ_Acc3(i)		Squ_Acc2(i)
#define Squ_Acc4(i)		Squ_Acc2(i)
#define Squ_Acc5(i)		Squ_Acc2(i)
#define Squ_Acc6(i)		Squ_Acc2(i)
#define Squ_Acc7(i)		Squ_Acc2(i)
#define Squ_Acc8(i)		Squ_Acc2(i)

#define SSE2_End(E, n)					\
	SSE2_SaveShift(2*(n)-3)			\
	AS2(	movdqa		xmm7, [esi+16])	\
	AS2(	movdqa		xmm0, [edi])	\
	AS2(	pmuludq		xmm0, xmm7)				\
	AS2(	movdqa		xmm2, [ebx])		\
	AS2(	pmuludq		xmm7, [edx])	\
	AS2(	movdqa		xmm6, xmm2)				\
	AS2(	pand		xmm2, xmm0)				\
	AS2(	psrld		xmm0, 16)				\
	AS2(	paddd		xmm4, xmm2)				\
	AS2(	paddd		xmm5, xmm0)				\
	AS2(	pand		xmm6, xmm7)				\
	AS2(	psrld		xmm7, 16)	\
	SSE2_SaveShift(2*(n)-2)			\
	SSE2_FinalSave(2*(n)-1)			\
	AS1(	pop		esp)\
	E

#define Squ_End(n)		SSE2_End(SquEpilogue, n)
#define Mul_End(n)		SSE2_End(MulEpilogue, n)
#define Top_End(n)		SSE2_End(TopEpilogue, n)

#define Squ_Column1(k, i)	\
	Squ_SSE2_SaveShift(k)					\
	AS2(	add			esi, 16)	\
	SSE2_FirstMultiply(1)\
	Squ_Acc##i(i)	\
	AS2(	paddd		xmm4, xmm4)		\
	AS2(	paddd		xmm5, xmm5)		\
	AS2(	movdqa		xmm3, [esi])				\
	AS2(	movq		xmm1, QWORD PTR [esi+8])	\
	AS2(	pmuludq		xmm1, xmm3)		\
	AS2(	pmuludq		xmm3, xmm3)		\
	AS2(	movdqa		xmm0, [ebx])\
	AS2(	movdqa		xmm2, xmm0)		\
	AS2(	pand		xmm0, xmm1)		\
	AS2(	psrld		xmm1, 16)		\
	AS2(	paddd		xmm6, xmm0)		\
	AS2(	paddd		xmm7, xmm1)		\
	AS2(	pand		xmm2, xmm3)		\
	AS2(	psrld		xmm3, 16)		\
	AS2(	paddd		xmm6, xmm6)		\
	AS2(	paddd		xmm7, xmm7)		\
	AS2(	paddd		xmm4, xmm2)		\
	AS2(	paddd		xmm5, xmm3)		\
	AS2(	movq		xmm0, QWORD PTR [esp+4])\
	AS2(	movq		xmm1, QWORD PTR [esp+12])\
	AS2(	paddd		xmm4, xmm0)\
	AS2(	paddd		xmm5, xmm1)\

#define Squ_Column0(k, i)	\
	Squ_SSE2_SaveShift(k)					\
	AS2(	add			edi, 16)	\
	AS2(	add			edx, 16)	\
	SSE2_FirstMultiply(1)\
	Squ_Acc##i(i)	\
	AS2(	paddd		xmm6, xmm6)		\
	AS2(	paddd		xmm7, xmm7)		\
	AS2(	paddd		xmm4, xmm4)		\
	AS2(	paddd		xmm5, xmm5)		\
	AS2(	movq		xmm0, QWORD PTR [esp+4])\
	AS2(	movq		xmm1, QWORD PTR [esp+12])\
	AS2(	paddd		xmm4, xmm0)\
	AS2(	paddd		xmm5, xmm1)\

#define SSE2_MulAdd45						\
	AS2(	movdqa		xmm7, [esi])	\
	AS2(	movdqa		xmm0, [edi])	\
	AS2(	pmuludq		xmm0, xmm7)				\
	AS2(	movdqa		xmm2, [ebx])		\
	AS2(	pmuludq		xmm7, [edx])	\
	AS2(	movdqa		xmm6, xmm2)				\
	AS2(	pand		xmm2, xmm0)				\
	AS2(	psrld		xmm0, 16)				\
	AS2(	paddd		xmm4, xmm2)				\
	AS2(	paddd		xmm5, xmm0)				\
	AS2(	pand		xmm6, xmm7)				\
	AS2(	psrld		xmm7, 16)

#define Mul_Begin(n)							\
	MulPrologue									\
	AS2(	mov		esi, esp)\
	AS2(	and		esp, 0xfffffff0)\
	AS2(	sub		esp, 48*n+16)\
	AS1(	push	esi)\
	AS2(	xor		edx, edx)					\
	ASL(1)										\
	ASS(	pshufd	xmm0, [eax+edx], 3,1,2,0)	\
	ASS(	pshufd	xmm1, [eax+edx], 2,0,3,1)	\
	ASS(	pshufd	xmm2, [edi+edx], 3,1,2,0)	\
	AS2(	movdqa	[esp+20+2*edx], xmm0)		\
	AS2(	psrlq	xmm0, 32)					\
	AS2(	movdqa	[esp+20+2*edx+16], xmm0)	\
	AS2(	movdqa	[esp+20+16*n+2*edx], xmm1)		\
	AS2(	psrlq	xmm1, 32)					\
	AS2(	movdqa	[esp+20+16*n+2*edx+16], xmm1)	\
	AS2(	movdqa	[esp+20+32*n+2*edx], xmm2)		\
	AS2(	psrlq	xmm2, 32)					\
	AS2(	movdqa	[esp+20+32*n+2*edx+16], xmm2)	\
	AS2(	add		edx, 16)					\
	AS2(	cmp		edx, 8*(n))					\
	ASJ(	jne,	1, b)						\
	AS2(	lea		edi, [esp+20])\
	AS2(	lea		edx, [esp+20+16*n])\
	AS2(	lea		esi, [esp+20+32*n])\
	SSE2_FirstMultiply(0)							\

#define Mul_Acc(i)								\
	ASL(LMul##i)										\
	AS2(	movdqa		xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])	\
	AS2(	movdqa		xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])	\
	AS2(	movdqa		xmm2, [ebx])	\
	AS2(	pmuludq		xmm0, xmm1)				\
	AS2(	pmuludq		xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])	\
	AS2(	movdqa		xmm3, xmm2)			\
	AS2(	pand		xmm2, xmm0)			\
	AS2(	psrld		xmm0, 16)			\
	AS2(	paddd		xmm4, xmm2)			\
	AS2(	paddd		xmm5, xmm0)			\
	AS2(	pand		xmm3, xmm1)			\
	AS2(	psrld		xmm1, 16)			\
	AS2(	paddd		xmm6, xmm3)			\
	AS2(	paddd		xmm7, xmm1)		\

#define Mul_Acc1(i)		
#define Mul_Acc2(i)		ASC(call, LMul##i)
#define Mul_Acc3(i)		Mul_Acc2(i)
#define Mul_Acc4(i)		Mul_Acc2(i)
#define Mul_Acc5(i)		Mul_Acc2(i)
#define Mul_Acc6(i)		Mul_Acc2(i)
#define Mul_Acc7(i)		Mul_Acc2(i)
#define Mul_Acc8(i)		Mul_Acc2(i)
#define Mul_Acc9(i)		Mul_Acc2(i)
#define Mul_Acc10(i)	Mul_Acc2(i)
#define Mul_Acc11(i)	Mul_Acc2(i)
#define Mul_Acc12(i)	Mul_Acc2(i)
#define Mul_Acc13(i)	Mul_Acc2(i)
#define Mul_Acc14(i)	Mul_Acc2(i)
#define Mul_Acc15(i)	Mul_Acc2(i)
#define Mul_Acc16(i)	Mul_Acc2(i)

#define Mul_Column1(k, i)	\
	SSE2_SaveShift(k)					\
	AS2(	add			esi, 16)	\
	SSE2_MulAdd45\
	Mul_Acc##i(i)	\

#define Mul_Column0(k, i)	\
	SSE2_SaveShift(k)					\
	AS2(	add			edi, 16)	\
	AS2(	add			edx, 16)	\
	SSE2_MulAdd45\
	Mul_Acc##i(i)	\

#define Bot_Acc(i)							\
	AS2(	movdqa		xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16])	\
	AS2(	movdqa		xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16])	\
	AS2(	pmuludq		xmm0, xmm1)				\
	AS2(	pmuludq		xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16])		\
	AS2(	paddq		xmm4, xmm0)				\
	AS2(	paddd		xmm6, xmm1)

#define Bot_SaveAcc(k)					\
	SSE2_SaveShift(k)							\
	AS2(	add			edi, 16)	\
	AS2(	add			edx, 16)	\
	AS2(	movdqa		xmm6, [esi])	\
	AS2(	movdqa		xmm0, [edi])	\
	AS2(	pmuludq		xmm0, xmm6)				\
	AS2(	paddq		xmm4, xmm0)				\
	AS2(	psllq		xmm5, 16)				\
	AS2(	paddq		xmm4, xmm5)				\
	AS2(	pmuludq		xmm6, [edx])

#define Bot_End(n)							\
	AS2(	movhlps		xmm7, xmm6)			\
	AS2(	paddd		xmm6, xmm7)			\
	AS2(	psllq		xmm6, 32)			\
	AS2(	paddd		xmm4, xmm6)			\
	AS2(	movq		QWORD PTR [ecx+8*((n)-1)], xmm4)	\
	AS1(	pop		esp)\
	MulEpilogue

#define Top_Begin(n)							\
	TopPrologue									\
	AS2(	mov		edx, esp)\
	AS2(	and		esp, 0xfffffff0)\
	AS2(	sub		esp, 48*n+16)\
	AS1(	push	edx)\
	AS2(	xor		edx, edx)					\
	ASL(1)										\
	ASS(	pshufd	xmm0, [eax+edx], 3,1,2,0)	\
	ASS(	pshufd	xmm1, [eax+edx], 2,0,3,1)	\
	ASS(	pshufd	xmm2, [edi+edx], 3,1,2,0)	\
	AS2(	movdqa	[esp+20+2*edx], xmm0)		\
	AS2(	psrlq	xmm0, 32)					\
	AS2(	movdqa	[esp+20+2*edx+16], xmm0)	\
	AS2(	movdqa	[esp+20+16*n+2*edx], xmm1)		\
	AS2(	psrlq	xmm1, 32)					\
	AS2(	movdqa	[esp+20+16*n+2*edx+16], xmm1)	\
	AS2(	movdqa	[esp+20+32*n+2*edx], xmm2)		\
	AS2(	psrlq	xmm2, 32)					\
	AS2(	movdqa	[esp+20+32*n+2*edx+16], xmm2)	\
	AS2(	add		edx, 16)					\
	AS2(	cmp		edx, 8*(n))					\
	ASJ(	jne,	1, b)						\
	AS2(	mov		eax, esi)					\
	AS2(	lea		edi, [esp+20+00*n+16*(n/2-1)])\
	AS2(	lea		edx, [esp+20+16*n+16*(n/2-1)])\
	AS2(	lea		esi, [esp+20+32*n+16*(n/2-1)])\
	AS2(	pxor	xmm4, xmm4)\
	AS2(	pxor	xmm5, xmm5)

#define Top_Acc(i)							\
	AS2(	movq		xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8])	\
	AS2(	pmuludq		xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16])	\
	AS2(	psrlq		xmm0, 48)				\
	AS2(	paddd		xmm5, xmm0)\

#define Top_Column0(i)	\
	AS2(	psllq		xmm5, 32)				\
	AS2(	add			edi, 16)	\
	AS2(	add			edx, 16)	\
	SSE2_MulAdd45\
	Mul_Acc##i(i)	\

#define Top_Column1(i)	\
	SSE2_SaveShift(0)					\
	AS2(	add			esi, 16)	\
	SSE2_MulAdd45\
	Mul_Acc##i(i)	\
	AS2(	shr			eax, 16)	\
	AS2(	movd		xmm0, eax)\
	AS2(	movd		xmm1, [ecx+4])\
	AS2(	psrld		xmm1, 16)\
	AS2(	pcmpgtd		xmm1, xmm0)\
	AS2(	psrld		xmm1, 31)\
	AS2(	paddd		xmm4, xmm1)\

void SSE2_Square4(word *C, const word *A)
{
	Squ_Begin(2)
	Squ_Column0(0, 1)
	Squ_End(2)
}

void SSE2_Square8(word *C, const word *A)
{
	Squ_Begin(4)
#ifndef __GNUC__
	ASJ(	jmp,	0, f)
	Squ_Acc(2)
	AS1(	ret) ASL(0)
#endif
	Squ_Column0(0, 1)
	Squ_Column1(1, 1)
	Squ_Column0(2, 2)
	Squ_Column1(3, 1)
	Squ_Column0(4, 1)
	Squ_End(4)
}

void SSE2_Square16(word *C, const word *A)
{
	Squ_Begin(8)
#ifndef __GNUC__
	ASJ(	jmp,	0, f)
	Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
	AS1(	ret) ASL(0)
#endif
	Squ_Column0(0, 1)
	Squ_Column1(1, 1)
	Squ_Column0(2, 2)
	Squ_Column1(3, 2)
	Squ_Column0(4, 3)
	Squ_Column1(5, 3)
	Squ_Column0(6, 4)
	Squ_Column1(7, 3)
	Squ_Column0(8, 3)
	Squ_Column1(9, 2)
	Squ_Column0(10, 2)
	Squ_Column1(11, 1)
	Squ_Column0(12, 1)
	Squ_End(8)
}

void SSE2_Square32(word *C, const word *A)
{
	Squ_Begin(16)
	ASJ(	jmp,	0, f)
	Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
	AS1(	ret) ASL(0)
	Squ_Column0(0, 1)
	Squ_Column1(1, 1)
	Squ_Column0(2, 2)
	Squ_Column1(3, 2)
	Squ_Column0(4, 3)
	Squ_Column1(5, 3)
	Squ_Column0(6, 4)
	Squ_Column1(7, 4)
	Squ_Column0(8, 5)
	Squ_Column1(9, 5)
	Squ_Column0(10, 6)
	Squ_Column1(11, 6)
	Squ_Column0(12, 7)
	Squ_Column1(13, 7)
	Squ_Column0(14, 8)
	Squ_Column1(15, 7)
	Squ_Column0(16, 7)
	Squ_Column1(17, 6)
	Squ_Column0(18, 6)
	Squ_Column1(19, 5)
	Squ_Column0(20, 5)
	Squ_Column1(21, 4)
	Squ_Column0(22, 4)
	Squ_Column1(23, 3)
	Squ_Column0(24, 3)
	Squ_Column1(25, 2)
	Squ_Column0(26, 2)
	Squ_Column1(27, 1)
	Squ_Column0(28, 1)
	Squ_End(16)
}

void SSE2_Multiply4(word *C, const word *A, const word *B)
{
	Mul_Begin(2)
#ifndef __GNUC__
	ASJ(	jmp,	0, f)
	Mul_Acc(2)
	AS1(	ret) ASL(0)
#endif
	Mul_Column0(0, 2)
	Mul_End(2)
}

void SSE2_Multiply8(word *C, const word *A, const word *B)
{
	Mul_Begin(4)
#ifndef __GNUC__
	ASJ(	jmp,	0, f)
	Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
	AS1(	ret) ASL(0)
#endif
	Mul_Column0(0, 2)
	Mul_Column1(1, 3)
	Mul_Column0(2, 4)
	Mul_Column1(3, 3)
	Mul_Column0(4, 2)
	Mul_End(4)
}

void SSE2_Multiply16(word *C, const word *A, const word *B)
{
	Mul_Begin(8)
#ifndef __GNUC__
	ASJ(	jmp,	0, f)
	Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
	AS1(	ret) ASL(0)
#endif
	Mul_Column0(0, 2)
	Mul_Column1(1, 3)
	Mul_Column0(2, 4)
	Mul_Column1(3, 5)
	Mul_Column0(4, 6)
	Mul_Column1(5, 7)
	Mul_Column0(6, 8)
	Mul_Column1(7, 7)
	Mul_Column0(8, 6)
	Mul_Column1(9, 5)
	Mul_Column0(10, 4)
	Mul_Column1(11, 3)
	Mul_Column0(12, 2)
	Mul_End(8)
}

void SSE2_Multiply32(word *C, const word *A, const word *B)
{
	Mul_Begin(16)
	ASJ(	jmp,	0, f)
	Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
	AS1(	ret) ASL(0)
	Mul_Column0(0, 2)
	Mul_Column1(1, 3)
	Mul_Column0(2, 4)
	Mul_Column1(3, 5)
	Mul_Column0(4, 6)
	Mul_Column1(5, 7)
	Mul_Column0(6, 8)
	Mul_Column1(7, 9)
	Mul_Column0(8, 10)
	Mul_Column1(9, 11)
	Mul_Column0(10, 12)
	Mul_Column1(11, 13)
	Mul_Column0(12, 14)
	Mul_Column1(13, 15)
	Mul_Column0(14, 16)
	Mul_Column1(15, 15)
	Mul_Column0(16, 14)
	Mul_Column1(17, 13)
	Mul_Column0(18, 12)
	Mul_Column1(19, 11)
	Mul_Column0(20, 10)
	Mul_Column1(21, 9)
	Mul_Column0(22, 8)
	Mul_Column1(23, 7)
	Mul_Column0(24, 6)
	Mul_Column1(25, 5)
	Mul_Column0(26, 4)
	Mul_Column1(27, 3)
	Mul_Column0(28, 2)
	Mul_End(16)
}

void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
{
	Mul_Begin(2)
	Bot_SaveAcc(0) Bot_Acc(2)
	Bot_End(2)
}

void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
{
	Mul_Begin(4)
#ifndef __GNUC__
	ASJ(	jmp,	0, f)
	Mul_Acc(3) Mul_Acc(2)
	AS1(	ret) ASL(0)
#endif
	Mul_Column0(0, 2)
	Mul_Column1(1, 3)
	Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -