asm-block-18.c

来自「Mac OS X 10.4.9 for x86 Source Code gcc」· C语言 代码 · 共 453 行

C
453
字号
/* APPLE LOCAL file CW asm blocks *//* { dg-do assemble { target i?86*-*-darwin* } } *//* { dg-options { -fasm-blocks -msse3 -O2 } } *//* Radar 4248228 */int packedw0x80;typedef int DWORD;typedef unsigned char unsigned8;typedef int int32;#define M_m0		0#define M_m8		8#define M_m16		16#define M_m24		24extern void e1(const unsigned8 *, unsigned8 *, int32, int32, int32, int32);typedef struct{  DWORD	m0[2];  DWORD	m8[2];  DWORD	m16[2];  DWORD	m24[2];} M_2;voide2(const unsigned8 *srcPtr, unsigned8 *dstPtr, int32 rows, int32 cols,   int32 sRowBytes, int32 dRowBytes){  int32	sRowB, dRowB, MMXColCnt, r0sum, r0sq, extras;  M_2	qArray, *pqArray;  if (rows <= 0  ||  cols <= 0)    return;  if (rows <= 1  ||  cols <= 7)    {      e1(srcPtr, dstPtr, rows, cols, sRowBytes, dRowBytes);      return;    }  asm {	mov		ebx, cols	sub		rows, 1	mov		ecx, ebx	and		ecx, 3	sar		ebx, 2	mov		extras, ecx	mov		MMXColCnt, ebx	mov		eax, sRowBytes	mov		ebx, cols   	mov		ecx, dRowBytes	and		ebx, 0fffffffCh	mov		esi, eax	mov		sRowB, eax	mov		dRowB, ecx	mov		eax, srcPtr	lea		edx, qArray	sub		eax, esi	add		edx, 7	and		edx, 0fffffff8h	mov		srcPtr, eax	mov		pqArray, edx	mov		edi, dstPtrRow:	movd		mm1, [-1][eax][esi]	pxor		mm3, mm3	movd		mm0, [-1][eax]	pslld		mm1, 24	movd		mm2, [-1][eax][esi*2]	punpcklbw	mm1, mm3	movq		mm4, mm1	pslld		mm0, 24	pslld		mm2, 24	pmullw		mm1, mm1	punpcklbw	mm0, mm3	paddw		mm4, mm0	punpcklbw	mm2, mm3	pmullw		mm0, mm0	paddw		mm4, mm2	pmullw		mm2, mm2	punpckhwd	mm1, mm3	movd		mm6, [eax][esi]	psrlq		mm4, 48	movd		mm7, [eax]	punpckhwd	mm0, mm3	movd		r0sum, mm4	paddd		mm0, mm1	movd		mm5, [eax][esi*2]	punpckhwd	mm2, mm3	punpcklbw	mm6, mm3	paddd		mm0, mm2	psrlq		mm0, 32	movq		mm1, mm6	punpcklbw	mm7, mm3	pmullw		mm1, mm1	movd		r0sq, mm0	punpcklbw	mm5, mm3	paddw		mm6, mm5	pmullw		mm5, mm5	paddw		mm6, mm7	pmullw		mm7, mm7	movq		mm4, mm1	punpcklwd	mm1, mm3	movq		mm0, mm5	punpckhwd	mm4, mm3	movq		mm2, mm7	punpcklwd	mm0, mm3	paddd		mm1, mm0	punpcklwd	mm2, mm3		punpckhwd	mm5, mm3	paddd		mm1, mm2	punpckhwd	mm7, mm3	paddd		mm4, mm5	paddd		mm4, mm7	movq		mm0, mm1	movd		mm2, r0sq	movq		mm7, mm4	movq		mm5, mm4	psrlq		mm1, 32	paddd		mm2, mm0	psllq		mm5, 32	paddd		mm2, mm1	paddd		mm7, mm1	paddd		mm2, mm5	psllq		mm0, 32	paddd		mm7, mm5	psrlq		mm4, 32	paddd		mm2, mm0	paddd		mm7, mm4	movq		mm0, mm2	pslld		mm2, 3	movd		r0sq, mm4	paddd		mm2, mm0		movd		mm1, r0sum	movq		mm5, mm6	paddw		mm5, mm1	movq		mm1, mm6	psrlq		mm1, 16	movq		mm4, mm6	psllq		mm4, 16	paddw		mm5, mm1	psrlq		mm6, 48	paddw		mm5, mm4	movq		mm0, mm5	punpcklwd	mm5, mm3	mov		ecx, MMXColCnt	pmaddwd		mm5, mm5	mov		ebx, pqArray	movq		mm4, mm7	add		eax, 4	movd		r0sum, mm6	psubd		mm2, mm5Col:	movd		mm6, [eax][esi]	movd		mm7, [eax]	punpcklbw	mm6, mm3	movd		mm5, [eax][esi*2]	movq		mm1, mm6	punpcklbw	mm7, mm3	pmullw		mm1, mm1	punpcklbw	mm5, mm3	paddw		mm6, mm5	pmullw		mm5, mm5	paddw		mm6, mm7	pmullw		mm7, mm7	movq		[ebx][M_m0], mm6	psllq		mm6, 48		movq		[ebx][M_m8], mm4	paddw		mm6, mm0	movq		[ebx][M_m16], mm2	punpckhwd	mm6, mm3		pmaddwd		mm6, mm6	movq		mm4, mm1	punpcklwd	mm1, mm3	movq		mm0, mm5	punpckhwd	mm4, mm3	movq		mm2, mm7	punpcklwd	mm0, mm3	paddd		mm1, mm0	punpcklwd	mm2, mm3		punpckhwd	mm5, mm3	paddd		mm1, mm2	punpckhwd	mm7, mm3	paddd		mm4, mm5	paddd		mm4, mm7	movq		mm0, mm1	movq		mm2, [ebx][M_m8]	psllq		mm0, 32	movq		mm5, [ebx][M_m16]	paddd		mm0, mm2	movq		mm2, mm0	pslld		mm0, 3	movq		mm7, mm5	paddd		mm0, mm2	psubd		mm0, mm6	movq		mm6, mm5	movq		mm2, mm0	pslld		mm6, 10	movq		mm3, mm0	pslld		mm7, 4	pslld		mm2, 10	paddd		mm6, mm7	pslld		mm3, 4	movq		mm7, mm6	paddd		mm2, mm3	paddd		mm6, mm6	movq		mm3, mm2	paddd		mm6, mm7	paddd		mm2, mm2	movq		mm7, mm5					pslld		mm7, 1	paddd		mm5, mm7	pslld		mm7, 1	paddd		mm5, mm7	pslld		mm7, 5	paddd		mm5, mm7	pslld		mm7, 1	paddd		mm5, mm7	psrld		mm5, 9	paddd		mm2, mm3	movq		mm7, packedw0x80	paddd		mm5, mm6	psrld		mm5, 16	movq		mm3, mm0					pslld		mm3, 1	paddd		mm0, mm3	pslld		mm3, 1	paddd		mm0, mm3	pslld		mm3, 5	paddd		mm0, mm3	pslld		mm3, 1	paddd		mm0, mm3	psrld		mm0, 9	movq		mm3, mm5	push		ecx	paddd		mm0, mm2	mov		ecx, 8	psrld		mm0, 16	punpckhdq	mm3, mm0	pxor		mm2, mm2	punpckldq	mm5, mm0	pxor		mm0, mm0	psllq		mm3, 16	por		mm5, mm3sqroot:	por		mm2, mm7	movq		mm6, mm5	movq		mm3, mm2	pmullw		mm2, mm2	psubusw		mm6, mm2	psubusw		mm2, mm5	pcmpeqw		mm2, mm6	pcmpeqw		 mm6, mm0	pxor		mm2, mm6	pand		mm2, mm7	psrlw		mm7, 1	pxor		mm2, mm3	dec		ecx	jnz		sqroot	pop		ecx	packuswb	mm2, mm2	movq		mm6, [ebx][M_m0]	pxor		mm3, mm3		movd		[edi], mm2	movq		mm0, mm1	movd		mm2, r0sq	movq		mm7, mm4	paddd		mm2, mm0	psrlq		mm1, 32	movq		mm5, mm4	paddd		mm2, mm1	psllq		mm5, 32	paddd		mm7, mm1	paddd		mm2, mm5	paddd		mm7, mm5	psllq		mm0, 32	paddd		mm2, mm0	psrlq		mm4, 32	paddd		mm7, mm4	movq		mm0, mm2	pslld		mm2, 3	movd		mm1, r0sum	paddd		mm2, mm0		movd		r0sq, mm4	movq		mm5, mm6	paddw		mm5, mm1	movq		mm1, mm6	psrlq		mm1, 16	movq		mm4, mm6	psllq		mm4, 16	paddw		mm5, mm1	psrlq		mm6, 48	paddw		mm5, mm4	movq		mm0, mm5	punpcklwd	mm5, mm3	movd		r0sum, mm6	pmaddwd		mm5, mm5	add		eax, 4	add		edi, 4	psubd		mm2, mm5	movq		mm4, mm7	dec		ecx   	jnz		Col	mov		ecx, extras	cmp		ecx, 0	je		EndRow	movd		mm6, [eax][esi]	movd		mm7, [eax]	punpcklbw	mm6, mm3	movd		mm5, [eax][esi*2]	movq		mm1, mm6	punpcklbw	mm7, mm3	pmullw		mm1, mm1	punpcklbw	mm5, mm3	paddw		mm6, mm5	pmullw		mm5, mm5	paddw		mm6, mm7	pmullw		mm7, mm7	movq		[ebx][M_m0], mm6	psllq		mm6, 48		movq		[ebx][M_m8], mm4	paddw		mm6, mm0	movq		[ebx][M_m16], mm2	punpckhwd	mm6, mm3		pmaddwd		mm6, mm6	movq		mm4, mm1	punpcklwd	mm1, mm3	movq		mm0, mm5	punpckhwd	mm4, mm3	movq		mm2, mm7	punpcklwd	mm0, mm3	paddd		mm1, mm0	punpcklwd	mm2, mm3		punpckhwd	mm5, mm3	paddd		mm1, mm2	punpckhwd	mm7, mm3	paddd		mm4, mm5	paddd		mm4, mm7	movq		mm0, mm1	movq		mm2, [ebx][M_m8]	psllq		mm0, 32	movq		mm5, [ebx][M_m16]	paddd		mm0, mm2	movq		mm2, mm0	pslld		mm0, 3	movq		mm7, mm5	paddd		mm0, mm2	psubd		mm0, mm6	movq		mm6, mm5	movq		mm2, mm0	pslld		mm6, 10	movq		mm3, mm0	pslld		mm7, 4	pslld		mm2, 10	paddd		mm6, mm7	pslld		mm3, 4	movq		mm7, mm6	paddd		mm2, mm3	paddd		mm6, mm6	movq		mm3, mm2	paddd		mm6, mm7	paddd		mm2, mm2	movq		mm7, mm5					pslld		mm7, 1	paddd		mm5, mm7	pslld		mm7, 1	paddd		mm5, mm7	pslld		mm7, 5	paddd		mm5, mm7	pslld		mm7, 1	paddd		mm5, mm7	psrld		mm5, 9	paddd		mm2, mm3	movq		mm7, packedw0x80	paddd		mm5, mm6	psrld		mm5, 16	movq		mm3, mm0					pslld		mm3, 1	paddd		mm0, mm3	pslld		mm3, 1	paddd		mm0, mm3	pslld		mm3, 5	paddd		mm0, mm3	pslld		mm3, 1	paddd		mm0, mm3	psrld		mm0, 9	movq		mm3, mm5	push		ecx	paddd		mm0, mm2	mov		ecx, 8	psrld		mm0, 16	punpckhdq	mm3, mm0	pxor		mm2, mm2	punpckldq	mm5, mm0	pxor		mm0, mm0	psllq		mm3, 16	por		mm5, mm3sqrootExtras:	por		mm2, mm7	movq		mm6, mm5	movq		mm3, mm2	pmullw		mm2, mm2	psubusw		mm6, mm2	psubusw		mm2, mm5	pcmpeqw		mm2, mm6	pcmpeqw		mm6, mm0	pxor		mm2, mm6	pand		mm2, mm7	psrlw		mm7, 1	pxor		mm2, mm3	dec		ecx	jnz		sqrootExtras	pop		ecx	packuswb	mm2, mm2	movq		mm6, [ebx][M_m0]	pxor		mm3, mm3		movd		ebx, mm2	mov		ecx, extrasStoreExtras:	mov		[edi], bl	inc		edi	shr		ebx, 8	dec		ecx	jg		StoreExtrasEndRow:	mov		eax, srcPtr	mov		edi, dstPtr   	mov		edx, dRowB	add		eax, esi	mov		ebx, rows	mov		srcPtr, eax	add		edi, edx	dec		ebx	mov		dstPtr, edi	mov		rows, ebx	jnz		Row	mov		rows, 1	add		eax, esi	mov		srcPtr, eax	mov		dstPtr, edi	emms  }  e1(srcPtr, dstPtr, rows, cols, sRowBytes, dRowBytes);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?