⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sse.cpp

📁 这是本人编写的软件接收机
💻 CPP
📖 第 1 页 / 共 4 页
字号:
	int cnt1, cnt2;

	cnt1 = cnt / 4;
	cnt2 = cnt - (cnt1*4);

	short M[8] = {1,-1,1,1,1,-1,1,1};


	__asm 
	{
		
		//Set up for loop
		mov edi, A;			// Address of A	source1
		mov esi, B;			// Address of B	source2
		mov ebx, C;			// Address of C output1
		mov ecx, cnt1;		// Counter
		movupd xmm7, M;		// Move the multiply thingie
		movss  xmm6, shift;	// Move the round thingie
		jecxz ZERO1;

		L1:

			movlpd xmm0, [edi];		//Copy from A
			movlpd xmm1, [edi+8];	//Copy from A

			movlpd xmm3, [esi];		//Copy from B
			movlpd xmm4, [esi+8];	//Copy from B

			punpckldq xmm0, xmm0;	//Copy low 32 bits to high 32 bits
			punpckldq xmm1, xmm1;	//Copy low 32 bits to high 32 bits

			punpckldq xmm3, xmm3;	//Copy low 32 bits to high 32 bits
			punpckldq xmm4, xmm4;	//Copy low 32 bits to high 32 bits

			pshuflw xmm3, xmm3, 0x14; //Shuffle Low 64 bits to get [Re Im Im Re]
			pshuflw xmm4, xmm4, 0x14; //Shuffle Low 64 bits to get [Re Im Im Re]

			pshufhw xmm3, xmm3, 0x14; //Shuffle High 64 bits to get [Re Im Im Re]
			pshufhw xmm4, xmm4, 0x14; //Shuffle High 64 bits to get [Re Im Im Re]

			pmullw xmm3, xmm7;		//Multiply to get [Re Im -Im Re]
			pmullw xmm4, xmm7;		//Multiply to get [Re Im -Im Re]

			pmaddwd xmm0, xmm3;		//Complex multiply and add
			pmaddwd xmm1, xmm4;		//Complex multiply and add

			psrad xmm0, xmm6;		//Shift by X bits
			psrad xmm1, xmm6;		//Shift by X bits

			packssdw xmm0, xmm0;	//Get into low 64 bits
			packssdw xmm1, xmm1;	//Get into low 64 bits

			movsd [ebx],   xmm0;	//Move into C
			movsd [ebx+8], xmm1;	//Move into C

			add edi, 16;			//Move in array
			add esi, 16;			//Move in array
			add ebx, 16;			//Move in array
		
		loop L1;							// Loop if not done

ZERO1:

		mov ecx, cnt2;
		jecxz ZERO2;

L2:

			movlpd		xmm0, [edi];		//Copy from A
			movlpd		xmm1, [esi];		//Copy from B

			punpckldq	xmm0, xmm0;			//Copy low 32 bits to high 32 bits
			punpckldq	xmm1, xmm1;			//Copy low 32 bits to high 32 bits

			pshuflw		xmm1, xmm1, 0x14;	//Shuffle Low 64 bits to get [Re Im Im Re]
			pmullw		xmm1, xmm7;			//Multiply to get [Re Im -Im Re]
			pmaddwd		xmm0, xmm1;			//Complex multiply and add
			psrad		xmm0, xmm6;			//Shift by X bits
			packssdw	xmm0, xmm0;			//Get into low 32 bits
			movd		[ebx], xmm0;		//Move into A

			add edi, 4;
			add esi, 4;
			add ebx, 4;

			loop L2;


ZERO2:
		EMMS;

	}


}


void sse_crot(void *A, void *B, int cnt, int shift)
{

	int cnt1;
	int cnt2;

	cnt1 = cnt/8;
	cnt2 = cnt - (cnt1*8);

	short M[8] = {1,-1,1,1,1,-1,1,1};			//To get mmx register into Re Im -Im Re format

	__asm 
	{
		
		//Set up for loop
		mov edi, A;			// Address of A	source1
		mov esi, B;			// Address of B	source2
		mov ecx, cnt1;		// Counter
		movupd xmm7, M;		// Move the multiply thingie
		movss  xmm6, shift;	// Move the round thingie

		//Set up the rotation register
		movss	xmm1, [esi];
		pshuflw	xmm1, xmm1, 0x14;	//Shuffle to get [Re Im Im Re]
		pmullw	xmm1, xmm7;			//Multiply to get [Re -Im Im Re]
		pshufd	xmm7, xmm1, 0x44;	//Now in mm7

		jecxz ZERO1;

		L1:

			movlpd xmm0, [edi];		//Copy from A
			movlpd xmm1, [edi+8];	//Copy from A
			movlpd xmm2, [edi+16];	//Copy from A
			movlpd xmm3, [edi+24];	//Copy from A

			punpckldq	xmm0, xmm0;	//Copy low 32 bits to high 32 bits
			punpckldq	xmm1, xmm1;	//Copy low 32 bits to high 32 bits
			punpckldq	xmm2, xmm2;	//Copy low 32 bits to high 32 bits
			punpckldq	xmm3, xmm3;	//Copy low 32 bits to high 32 bits

			pmaddwd		xmm0, xmm7;	//Complex multiply and add
			pmaddwd		xmm1, xmm7;	//Complex multiply and add
			pmaddwd		xmm2, xmm7;	//Complex multiply and add
			pmaddwd		xmm3, xmm7;	//Complex multiply and add

			psrad		xmm0, xmm6;	//Shift by X bits
			psrad		xmm1, xmm6;	//Shift by X bits
			psrad		xmm2, xmm6;	//Shift by X bits
			psrad		xmm3, xmm6;	//Shift by X bits

			packssdw	xmm0, xmm0;	//Get into low 32 bits
			packssdw	xmm1, xmm1;	//Get into low 32 bits
			packssdw	xmm2, xmm2;	//Get into low 32 bits
			packssdw	xmm3, xmm3;	//Get into low 32 bits

			movsd [edi],    xmm0;	//Move into A
			movsd [edi+8],  xmm1;	//Move into A
			movsd [edi+16], xmm2;	//Move into A
			movsd [edi+24], xmm3;	//Move into A

			add			edi, 32;	//Move in array
		
		loop L1;							// Loop if not done

ZERO1:

		mov ecx, cnt2;
		jecxz ZERO2;

		L2:

			movss		xmm0, [edi];	//Copy from A

			punpckldq	xmm0, xmm0;		//Copy low 32 bits to high 32 bits
			pmaddwd		xmm0, xmm7;		//Complex multiply and add
			psrad		xmm0, xmm6;		//Shift by X bits
			packssdw	xmm0, xmm0;		//Get into low 32 bits

			movss		[edi], xmm0;	//Move into A

			add			edi, 4;			//Move in array
		
		loop L2;						// Loop if not done

ZERO2:

		EMMS;

	}


}



void sse_crot(void *A, void *B, void *C, int cnt, int shift)
{

	int cnt1;
	int cnt2;

	cnt1 = cnt/8;
	cnt2 = cnt - (cnt1*8);

	short M[8] = {1,-1,1,1,1,-1,1,1};			//To get mmx register into Re Im -Im Re format

	__asm 
	{
		
		//Set up for loop
		mov edi, A;			// Address of A	source1
		mov esi, B;			// Address of B	source2
		mov ebx, C;			// Address of C input1
		mov ecx, cnt1;		// Counter
		movupd xmm7, M;		// Move the multiply thingie
		movss  xmm6, shift;	// Move the round thingie

		//Set up the rotation register
		movss	xmm1, [esi];
		pshuflw	xmm1, xmm1, 0x14;	//Shuffle to get [Re Im Im Re]
		pmullw	xmm1, xmm7;			//Multiply to get [Re -Im Im Re]
		pshufd	xmm7, xmm1, 0x44;	//Now in mm7

		jecxz ZERO1;

		L1:

			movlpd xmm0, [edi];		//Copy from A
			movlpd xmm1, [edi+8];	//Copy from A
			movlpd xmm2, [edi+16];	//Copy from A
			movlpd xmm3, [edi+24];	//Copy from A

			punpckldq	xmm0, xmm0;	//Copy low 32 bits to high 32 bits
			punpckldq	xmm1, xmm1;	//Copy low 32 bits to high 32 bits
			punpckldq	xmm2, xmm2;	//Copy low 32 bits to high 32 bits
			punpckldq	xmm3, xmm3;	//Copy low 32 bits to high 32 bits

			pmaddwd		xmm0, xmm7;	//Complex multiply and add
			pmaddwd		xmm1, xmm7;	//Complex multiply and add
			pmaddwd		xmm2, xmm7;	//Complex multiply and add
			pmaddwd		xmm3, xmm7;	//Complex multiply and add

			psrad		xmm0, xmm6;	//Shift by X bits
			psrad		xmm1, xmm6;	//Shift by X bits
			psrad		xmm2, xmm6;	//Shift by X bits
			psrad		xmm3, xmm6;	//Shift by X bits

			packssdw	xmm0, xmm0;	//Get into low 32 bits
			packssdw	xmm1, xmm1;	//Get into low 32 bits
			packssdw	xmm2, xmm2;	//Get into low 32 bits
			packssdw	xmm3, xmm3;	//Get into low 32 bits

			movsd [ebx],    xmm0;	//Move into A
			movsd [ebx+8],  xmm1;	//Move into A
			movsd [ebx+16], xmm2;	//Move into A
			movsd [ebx+24], xmm3;	//Move into A

			add			edi, 32;	//Move in array
			add			ebx, 32;
		
		loop L1;							// Loop if not done

ZERO1:

		mov ecx, cnt2;
		jecxz ZERO2;

		L2:

			movss		xmm0, [edi];		//Copy from A

			punpckldq	xmm0, xmm0;	//Copy low 32 bits to high 32 bits
			pmaddwd		xmm0, xmm7;	//Complex multiply and add
			psrad		xmm0, xmm6;	//Shift by X bits
			packssdw	xmm0, xmm0;	//Get into low 32 bits

			movss		[ebx], xmm0;	//Move into A

			add			edi, 4;		//Move in array
			add			ebx, 4;
		
		loop L2;					// Loop if not done

ZERO2:

		EMMS;

	}


}



void sse_qnt(void *A, int cnt)
{

	__int64 hash1 = 0x8000800080008000; //to save the sign bit
	__int64 hash2 = 0x0001000100010001; //add the "1" magnitude
	unsigned short	shash1 = 0x8000;
	unsigned short	shash2 = 0xfffe;
	int cnt1;
	int cnt2;

	cnt1 = cnt / 40;						
	cnt2 = (cnt - (40*cnt1));				

	if(((int)A%16))
	{
		__asm 
		{
			
			// Set up for loop
			mov edi, A;			// Address of A
			mov ecx, cnt1;		// Counter
			movq xmm6, [hash1];	//move in the sign bit hash
			movq xmm7, [hash2];	//move in the magnitude hash
			pshufd	xmm6, xmm6, 0x44;	
			pshufd	xmm7, xmm7, 0x44;	

			jecxz ZERO;

			L1:

				movupd xmm0, [edi];
				movupd xmm1, [edi+16];
				movupd xmm2, [edi+32];
				movupd xmm3, [edi+48];
				movupd xmm4, [edi+64];

				pand xmm0, xmm6;		//Clear everything except the sign bit
				pand xmm1, xmm6;		//Clear everything except the sign bit
				pand xmm2, xmm6;		//Clear everything except the sign bit
				pand xmm3, xmm6;		//Clear everything except the sign bit
				pand xmm4, xmm6;		//Clear everything except the sign bit

				pcmpeqw xmm0, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm1, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm2, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm3, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm4, xmm6;		//Is the sign bit 1 or 0

				por		xmm0, xmm7;		//Add the magnitude
				por		xmm1, xmm7;		//Add the magnitude
				por		xmm2, xmm7;		//Add the magnitude
				por		xmm3, xmm7;		//Add the magnitude
				por		xmm4, xmm7;		//Add the magnitude

				movupd	[edi],	  xmm0;	//Move back to the array
				movupd	[edi+16], xmm1;	//Move back to the array
				movupd	[edi+32], xmm2;	//Move back to the array
				movupd	[edi+48], xmm3;	//Move back to the array
				movupd	[edi+64], xmm4;	//Move back to the array

				add edi, 80;

			loop L1;					// Loop if not done

	ZERO:
				mov ecx, cnt2;			// Finish off operation with second loop
				jecxz ZERO1;

				mov ax, shash1;
				mov bx, shash2;

			L2:

				mov dx, [edi];
				and dx, ax;
				cmp dx, ax;
				cmove dx, bx;
				add dx, 0x0001;
				mov [edi], dx;

				add edi, 2;

			loop L2;

				
	ZERO1:	

			EMMS;							// done with MMX

		
		}
	}
	else
	{
		__asm 
		{
			
			// Set up for loop
			mov edi, A;			// Address of A
			mov ecx, cnt1;		// Counter
			movq xmm6, [hash1];	//move in the sign bit hash
			movq xmm7, [hash2];	//move in the magnitude hash
			pshufd	xmm6, xmm6, 0x44;	
			pshufd	xmm7, xmm7, 0x44;	

			jecxz AZERO;

			AL1:

				movapd xmm0, [edi];
				movapd xmm1, [edi+16];
				movapd xmm2, [edi+32];
				movapd xmm3, [edi+48];
				movapd xmm4, [edi+64];

				pand xmm0, xmm6;		//Clear everything except the sign bit
				pand xmm1, xmm6;		//Clear everything except the sign bit
				pand xmm2, xmm6;		//Clear everything except the sign bit
				pand xmm3, xmm6;		//Clear everything except the sign bit
				pand xmm4, xmm6;		//Clear everything except the sign bit

				pcmpeqw xmm0, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm1, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm2, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm3, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm4, xmm6;		//Is the sign bit 1 or 0

				por		xmm0, xmm7;		//Add the magnitude
				por		xmm1, xmm7;		//Add the magnitude
				por		xmm2, xmm7;		//Add the magnitude
				por		xmm3, xmm7;		//Add the magnitude
				por		xmm4, xmm7;		//Add the magnitude

				movapd	[edi],	  xmm0;	//Move back to the array
				movapd	[edi+16], xmm1;	//Move back to the array
				movapd	[edi+32], xmm2;	//Move back to the array
				movapd	[edi+48], xmm3;	//Move back to the array
				movapd	[edi+64], xmm4;	//Move back to the array

				add edi, 80;

			loop AL1;					// Loop if not done

	AZERO:
				mov ecx, cnt2;			// Finish off operation with second loop
				jecxz AZERO1;

				mov ax, shash1;
				mov bx, shash2;

			AL2:

				mov dx, [edi];
				and dx, ax;
				cmp dx, ax;
				cmove dx, bx;
				add dx, 0x0001;
				mov [edi], dx;

				add edi, 2;

			loop AL2;

				
	AZERO1:	

			EMMS;							// done with MMX

		
		}
	}




}


void sse_qnt(void *A, void *C, int cnt)
{

	__int64 hash1 = 0x8000800080008000; //to save the sign bit
	__int64 hash2 = 0x0001000100010001; //add the "1" magnitude
	unsigned short	shash1 = 0x8000;
	unsigned short	shash2 = 0xfffe;
	int cnt1;
	int cnt2;

	cnt1 = cnt / 40;						
	cnt2 = (cnt - (40*cnt1));				

	if(((int)A%16) || ((int)C%16))
	{
		__asm 
		{
			
			// Set up for loop
			mov edi, A;			// Address of A
			mov esi, C;			// Address of C
			mov ecx, cnt1;		// Counter
			movq xmm6, [hash1];	//move in the sign bit hash
			movq xmm7, [hash2];	//move in the magnitude hash
			pshufd	xmm6, xmm6, 0x44;	
			pshufd	xmm7, xmm7, 0x44;	
			jecxz ZERO;


			L1:

				movupd xmm0, [edi];
				movupd xmm1, [edi+16];
				movupd xmm2, [edi+32];
				movupd xmm3, [edi+48];
				movupd xmm4, [edi+64];

				pand xmm0, xmm6;		//Clear everything except the sign bit
				pand xmm1, xmm6;		//Clear everything except the sign bit
				pand xmm2, xmm6;		//Clear everything except the sign bit
				pand xmm3, xmm6;		//Clear everything except the sign bit
				pand xmm4, xmm6;		//Clear everything except the sign bit

				pcmpeqw xmm0, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm1, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm2, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm3, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm4, xmm6;		//Is the sign bit 1 or 0

				por		xmm0, xmm7;		//Add the magnitude
				por		xmm1, xmm7;		//Add the magnitude
				por		xmm2, xmm7;		//Add the magnitude
				por		xmm3, xmm7;		//Add the magnitude
				por		xmm4, xmm7;		//Add the magnitude

				movupd	[esi],	  xmm0;	//Move back to the array
				movupd	[esi+16], xmm1;	//Move back to the array
				movupd	[esi+32], xmm2;	//Move back to the array
				movupd	[esi+48], xmm3;	//Move back to the array
				movupd	[esi+64], xmm4;	//Move back to the array

				add edi, 80;
				add esi, 80;

			loop L1;					// Loop if not done

	ZERO:
				mov ecx, cnt2;			// Finish off operation with second loop
				jecxz ZERO1;

				mov ax, shash1;
				mov bx, shash2;

			L2:

				mov dx, [edi];
				and dx, ax;
				cmp dx, ax;
				cmove dx, bx;
				add dx, 0x0001;
				mov [esi], dx;

				add edi, 2;
				add esi, 2;

			loop L2;

				
	ZERO1:	

			EMMS;							// done with MMX

		
		}
	}
	else
	{
		__asm 
		{
			
			// Set up for loop
			mov edi, A;			// Address of A
			mov esi, C;			// Address of C
			mov ecx, cnt1;		// Counter
			movq xmm6, [hash1];	//move in the sign bit hash
			movq xmm7, [hash2];	//move in the magnitude hash
			pshufd	xmm6, xmm6, 0x44;	
			pshufd	xmm7, xmm7, 0x44;	
			jecxz AZERO;


			AL1:

				movapd xmm0, [edi];
				movapd xmm1, [edi+16];
				movapd xmm2, [edi+32];
				movapd xmm3, [edi+48];
				movapd xmm4, [edi+64];

				pand xmm0, xmm6;		//Clear everything except the sign bit
				pand xmm1, xmm6;		//Clear everything except the sign bit
				pand xmm2, xmm6;		//Clear everything except the sign bit
				pand xmm3, xmm6;		//Clear everything except the sign bit
				pand xmm4, xmm6;		//Clear everything except the sign bit

				pcmpeqw xmm0, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm1, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm2, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm3, xmm6;		//Is the sign bit 1 or 0
				pcmpeqw xmm4, xmm6;		//Is the sign bit 1 or 0

				por		xmm0, xmm7;		//Add the magnitude
				por		xmm1, xmm7;		//Add the magnitude
				por		xmm2, xmm7;		//Add the magnitude
				por		xmm3, xmm7;		//Add the magnitude
				por		xmm4, xmm7;		//Add the magnitude

				movapd	[esi],	  xmm0;	//Move back to the array
				movapd	[esi+16], xmm1;	//Move back to the array
				movapd	[esi+32], xmm2;	//Move back to the array
				movapd	[esi+48], xmm3;	//Move back to the array
				movapd	[esi+64], xmm4;	//Move back to the array

				add edi, 80;
				add esi, 80;

			loop AL1;					// Loop if not done

	AZERO:
				mov ecx, cnt2;			// Finish off operation with second loop
				jecxz AZERO1;

				mov ax, shash1;
				mov bx, shash2;

			AL2:

				mov dx, [edi];
				and dx, ax;
				cmp dx, ax;
				cmove dx, bx;
				add dx, 0x0001;
				mov [esi], dx;

				add edi, 2;
				add esi, 2;

			loop AL2;

				
	AZERO1:	

			EMMS;							// done with MMX

		
		}
	}




}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -