📄 jidctfst.c

📁 jpeg编解码器
💻 C
📖 第 1 页 / 共 5 页
字号:
    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)			    & RANGE_MASK];    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)			    & RANGE_MASK];    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)			    & RANGE_MASK];    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)			    & RANGE_MASK];    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)			    & RANGE_MASK];    wsptr += DCTSIZE;		/* advance pointer to next row */  }}#if defined(HAVE_MMX_INTEL_MNEMONICS) || defined(HAVE_MMX_ATT_MNEMONICS) #if IFAST_MULT_TYPE != short#error IFAST_MULT_TYPE has to be 16 bits wide (look in jdct.h), otherwise the ASM MMX code will produce strange output !#endif#define __int64 long long /* This won't work for Intel compilers - tell Gernot to help fixing ! */ #define int16 short /* And this won't either */const     __int64 _fix_141      = 0x5a825a825a825a82LL;const	  __int64 _fix_184n261	= 0xcf04cf04cf04cf04LL;const	  __int64 _fix_184	= 0x7641764176417641LL;const	  __int64 _fix_n184	= 0x896f896f896f896fLL;const	  __int64 _fix_108n184	= 0xcf04cf04cf04cf04LL;const	  __int64 _const_0x0080	= 0x0080008000800080LL;__inline GLOBAL(void)jpeg_idct_ifast_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr,		 JCOEFPTR inptr,		 JSAMPARRAY outptr, JDIMENSION output_col){  int16 workspace[DCTSIZE2 + 4];	/* buffers data between passes */  int16 *wsptr=workspace;  int16 *quantptr=compptr->dct_table;#if defined(HAVE_MMX_INTEL_MNEMONICS)  __asm{     	mov		edi, quantptr	mov		ebx, inptr	mov		esi, wsptr	add		esi, 0x07		;align wsptr to qword	and		esi, 0xfffffff8	;align wsptr to qword	mov		eax, esi    /* Odd part */	movq		mm1, [ebx + 8*10]		;load inptr[DCTSIZE*5]	pmullw		mm1, [edi + 8*10]		;tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);	movq		mm0, [ebx + 8*6]		;load inptr[DCTSIZE*3]	pmullw		mm0, [edi + 8*6]		;tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);	movq		mm3, [ebx + 8*2]		;load inptr[DCTSIZE*1]	movq	mm2, mm1					;copy tmp6	/* phase 6 */	pmullw		mm3, [edi + 8*2]		;tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);	movq		mm4, [ebx + 8*14]		;load inptr[DCTSIZE*1]	paddw	mm1, mm0					;z13 = tmp6 + tmp5;	pmullw		mm4, [edi + 8*14]	    ;tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);	psubw	mm2, mm0					;z10 = tmp6 - tmp5   	psllw		mm2, 2				;shift z10	movq		mm0, mm2			;copy z10	pmulhw		mm2, fix_184n261	;MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */	movq		mm5, mm3				;copy tmp4	pmulhw		mm0, fix_n184		;MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */	paddw		mm3, mm4				;z11 = tmp4 + tmp7;	movq		mm6, mm3				;copy z11			/* phase 5 */	psubw		mm5, mm4				;z12 = tmp4 - tmp7;	psubw		mm6, mm1				;z11-z13	psllw		mm5, 2				;shift z12	movq		mm4, [ebx + 8*12]		;load inptr[DCTSIZE*6], even part 	movq		mm7, mm5			;copy z12	pmulhw		mm5, fix_108n184	;MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part	paddw		mm3, mm1				;tmp7 = z11 + z13;	    /* Even part */	pmulhw		mm7, fix_184		;MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */	psllw		mm6, 2	movq		mm1, [ebx + 8*4]		;load inptr[DCTSIZE*2]	pmullw		mm1, [edi + 8*4]		;tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);	paddw		mm0, mm5			;tmp10	pmullw		mm4, [edi + 8*12]		;tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);	paddw		mm2, mm7			;tmp12	pmulhw		mm6, fix_141			;tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */	psubw		mm2, mm3		;tmp6 = tmp12 - tmp7	movq		mm5, mm1				;copy tmp1	paddw		mm1, mm4				;tmp13= tmp1 + tmp3;	/* phases 5-3 */	psubw		mm5, mm4				;tmp1-tmp3	psubw		mm6, mm2		;tmp5 = tmp11 - tmp6;	movq		[esi+8*0], mm1			;save tmp13 in workspace	psllw		mm5, 2					;shift tmp1-tmp3    	movq		mm7, [ebx + 8*0]		;load inptr[DCTSIZE*0]	pmulhw		mm5, fix_141			;MULTIPLY(tmp1 - tmp3, FIX_1_414213562)	paddw		mm0, mm6		;tmp4 = tmp10 + tmp5;	pmullw		mm7, [edi + 8*0]		;tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);	movq		mm4, [ebx + 8*8]		;load inptr[DCTSIZE*4]		pmullw		mm4, [edi + 8*8]		;tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);	psubw		mm5, mm1				;tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */	movq		[esi+8*4], mm0		;save tmp4 in workspace	movq		mm1, mm7			;copy tmp0	/* phase 3 */	movq		[esi+8*2], mm5		;save tmp12 in workspace	psubw		mm1, mm4			;tmp11 = tmp0 - tmp2; 	paddw		mm7, mm4			;tmp10 = tmp0 + tmp2;    movq		mm5, mm1		;copy tmp11		paddw		mm1, [esi+8*2]	;tmp1 = tmp11 + tmp12;	movq		mm4, mm7		;copy tmp10		/* phase 2 */	paddw		mm7, [esi+8*0]	;tmp0 = tmp10 + tmp13;		psubw		mm4, [esi+8*0]	;tmp3 = tmp10 - tmp13;	movq		mm0, mm7		;copy tmp0	psubw		mm5, [esi+8*2]	;tmp2 = tmp11 - tmp12;	paddw		mm7, mm3		;wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);		psubw		mm0, mm3			;wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);	movq		[esi + 8*0], mm7	;wsptr[DCTSIZE*0]	movq		mm3, mm1			;copy tmp1	movq		[esi + 8*14], mm0	;wsptr[DCTSIZE*7]	paddw		mm1, mm2			;wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);	psubw		mm3, mm2			;wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);	movq		[esi + 8*2], mm1	;wsptr[DCTSIZE*1]	movq		mm1, mm4			;copy tmp3	movq		[esi + 8*12], mm3	;wsptr[DCTSIZE*6]	paddw		mm4, [esi+8*4]		;wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);	psubw		mm1, [esi+8*4]		;wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);	movq		[esi + 8*8], mm4	movq		mm7, mm5			;copy tmp2	paddw		mm5, mm6			;wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)	movq		[esi+8*6], mm1		;	psubw		mm7, mm6			;wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);	movq		[esi + 8*4], mm5	movq		[esi + 8*10], mm7/*****************************************************************/	add		edi, 8	add		ebx, 8	add		esi, 8/*****************************************************************/	movq		mm1, [ebx + 8*10]		;load inptr[DCTSIZE*5]	pmullw		mm1, [edi + 8*10]		;tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);	movq		mm0, [ebx + 8*6]		;load inptr[DCTSIZE*3]	pmullw		mm0, [edi + 8*6]		;tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);	movq		mm3, [ebx + 8*2]		;load inptr[DCTSIZE*1]	movq	mm2, mm1					;copy tmp6	/* phase 6 */	pmullw		mm3, [edi + 8*2]		;tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);	movq		mm4, [ebx + 8*14]		;load inptr[DCTSIZE*1]	paddw	mm1, mm0					;z13 = tmp6 + tmp5;	pmullw		mm4, [edi + 8*14]	    ;tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);	psubw	mm2, mm0					;z10 = tmp6 - tmp5   	psllw		mm2, 2				;shift z10	movq		mm0, mm2			;copy z10	pmulhw		mm2, fix_184n261	;MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */	movq		mm5, mm3				;copy tmp4	pmulhw		mm0, fix_n184		;MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */	paddw		mm3, mm4				;z11 = tmp4 + tmp7;	movq		mm6, mm3				;copy z11			/* phase 5 */	psubw		mm5, mm4				;z12 = tmp4 - tmp7;	psubw		mm6, mm1				;z11-z13	psllw		mm5, 2				;shift z12	movq		mm4, [ebx + 8*12]		;load inptr[DCTSIZE*6], even part 	movq		mm7, mm5			;copy z12	pmulhw		mm5, fix_108n184	;MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part	paddw		mm3, mm1				;tmp7 = z11 + z13;	    /* Even part */	pmulhw		mm7, fix_184		;MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */	psllw		mm6, 2	movq		mm1, [ebx + 8*4]		;load inptr[DCTSIZE*2]	pmullw		mm1, [edi + 8*4]		;tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);	paddw		mm0, mm5			;tmp10	pmullw		mm4, [edi + 8*12]		;tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);	paddw		mm2, mm7			;tmp12	pmulhw		mm6, fix_141			;tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */	psubw		mm2, mm3		;tmp6 = tmp12 - tmp7	movq		mm5, mm1				;copy tmp1	paddw		mm1, mm4				;tmp13= tmp1 + tmp3;	/* phases 5-3 */	psubw		mm5, mm4				;tmp1-tmp3	psubw		mm6, mm2		;tmp5 = tmp11 - tmp6;	movq		[esi+8*0], mm1			;save tmp13 in workspace	psllw		mm5, 2					;shift tmp1-tmp3    	movq		mm7, [ebx + 8*0]		;load inptr[DCTSIZE*0]	paddw		mm0, mm6		;tmp4 = tmp10 + tmp5;	pmulhw		mm5, fix_141			;MULTIPLY(tmp1 - tmp3, FIX_1_414213562)	pmullw		mm7, [edi + 8*0]		;tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);	movq		mm4, [ebx + 8*8]		;load inptr[DCTSIZE*4]		pmullw		mm4, [edi + 8*8]		;tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);	psubw		mm5, mm1				;tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */	movq		[esi+8*4], mm0		;save tmp4 in workspace	movq		mm1, mm7			;copy tmp0	/* phase 3 */	movq		[esi+8*2], mm5		;save tmp12 in workspace	psubw		mm1, mm4			;tmp11 = tmp0 - tmp2; 	paddw		mm7, mm4			;tmp10 = tmp0 + tmp2;    movq		mm5, mm1		;copy tmp11		paddw		mm1, [esi+8*2]	;tmp1 = tmp11 + tmp12;	movq		mm4, mm7		;copy tmp10		/* phase 2 */	paddw		mm7, [esi+8*0]	;tmp0 = tmp10 + tmp13;		psubw		mm4, [esi+8*0]	;tmp3 = tmp10 - tmp13;	movq		mm0, mm7		;copy tmp0	psubw		mm5, [esi+8*2]	;tmp2 = tmp11 - tmp12;	paddw		mm7, mm3		;wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);		psubw		mm0, mm3			;wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);	movq		[esi + 8*0], mm7	;wsptr[DCTSIZE*0]	movq		mm3, mm1			;copy tmp1	movq		[esi + 8*14], mm0	;wsptr[DCTSIZE*7]	paddw		mm1, mm2			;wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);	psubw		mm3, mm2			;wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);	movq		[esi + 8*2], mm1	;wsptr[DCTSIZE*1]	movq		mm1, mm4			;copy tmp3	movq		[esi + 8*12], mm3	;wsptr[DCTSIZE*6]	paddw		mm4, [esi+8*4]		;wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);	psubw		mm1, [esi+8*4]		;wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);	movq		[esi + 8*8], mm4	movq		mm7, mm5			;copy tmp2	paddw		mm5, mm6			;wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)	movq		[esi+8*6], mm1		;	psubw		mm7, mm6			;wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);	movq		[esi + 8*4], mm5	movq		[esi + 8*10], mm7/*****************************************************************/  /* Pass 2: process rows from work array, store into output array. */  /* Note that we must descale the results by a factor of 8 == 2**3, */  /* and also undo the PASS1_BITS scaling. *//*****************************************************************/    /* Even part */	mov			esi, eax	mov			eax, outptr//    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);//    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);//    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);//    tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);	movq		mm0, [esi+8*0]		;wsptr[0,0],[0,1],[0,2],[0,3]	movq		mm1, [esi+8*1]		;wsptr[0,4],[0,5],[0,6],[0,7]	movq		mm2, mm0		movq		mm3, [esi+8*2]		;wsptr[1,0],[1,1],[1,2],[1,3]	paddw		mm0, mm1			;wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]	movq		mm4, [esi+8*3]		;wsptr[1,4],[1,5],[1,6],[1,7]	psubw		mm2, mm1			;wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]	movq		mm6, mm0	movq		mm5, mm3		paddw		mm3, mm4			;wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]	movq		mm1, mm2	psubw		mm5, mm4			;wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]	punpcklwd	mm0, mm3			;wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]	movq		mm7, [esi+8*7]		;wsptr[3,4],[3,5],[3,6],[3,7]	punpckhwd	mm6, mm3			;wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]	movq		mm3, [esi+8*4]		;wsptr[2,0],[2,1],[2,2],[2,3]	punpckldq	mm0, mm6	;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]	punpcklwd	mm1, mm5			;wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]	movq		mm4, mm3	movq		mm6, [esi+8*6]		;wsptr[3,0],[3,1],[3,2],[3,3]	punpckhwd	mm2, mm5			;wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]	movq		mm5, [esi+8*5]		;wsptr[2,4],[2,5],[2,6],[2,7]	punpckldq	mm1, mm2	;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]		paddw		mm3, mm5			;wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -