📄 jidctfst.c
字号:
psubw mm2, mm7 ;wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] punpckhwd mm5, mm6 ;wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] movq mm7, mm4 punpckldq mm3, mm5 ;wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] punpcklwd mm4, mm2 ;wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] punpckhwd mm7, mm2 ;wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] punpckldq mm4, mm7 ;wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] movq mm6, mm1// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] movq mm2, mm0 punpckhdq mm6, mm4 ;wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] punpckldq mm1, mm4 ;wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] psllw mm6, 2 pmulhw mm6, fix_141 punpckldq mm0, mm3 ;wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] punpckhdq mm2, mm3 ;wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] movq mm7, mm0// tmp0 = tmp10 + tmp13;// tmp3 = tmp10 - tmp13; paddw mm0, mm2 ;[0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] psubw mm7, mm2 ;[0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; psubw mm6, mm2 ;wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]// tmp1 = tmp11 + tmp12;// tmp2 = tmp11 - tmp12; movq mm5, mm1 /* Odd part */// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; movq mm3, [esi+8*0] ;wsptr[0,0],[0,1],[0,2],[0,3] paddw mm1, mm6 ;[0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] movq mm4, [esi+8*1] ;wsptr[0,4],[0,5],[0,6],[0,7] psubw mm5, mm6 ;[0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] movq mm6, mm3 punpckldq mm3, mm4 ;wsptr[0,0],[0,1],[0,4],[0,5] punpckhdq mm4, mm6 ;wsptr[0,6],[0,7],[0,2],[0,3] movq mm2, mm3//Save tmp0 and tmp1 in wsptr movq [esi+8*0], mm0 ;save tmp0 paddw mm2, mm4 ;wsptr[xxx],[0,z11],[xxx],[0,z13] //Continue with z10 --- z13 movq mm6, [esi+8*2] ;wsptr[1,0],[1,1],[1,2],[1,3] psubw mm3, mm4 ;wsptr[xxx],[0,z12],[xxx],[0,z10] movq mm0, [esi+8*3] ;wsptr[1,4],[1,5],[1,6],[1,7] movq mm4, mm6 movq [esi+8*1], mm1 ;save tmp1 punpckldq mm6, mm0 ;wsptr[1,0],[1,1],[1,4],[1,5] punpckhdq mm0, mm4 ;wsptr[1,6],[1,7],[1,2],[1,3] movq mm1, mm6 //Save tmp2 and tmp3 in wsptr paddw mm6, mm0 ;wsptr[xxx],[1,z11],[xxx],[1,z13] movq mm4, mm2 //Continue with z10 --- z13 movq [esi+8*2], mm5 ;save tmp2 punpcklwd mm2, mm6 ;wsptr[xxx],[xxx],[0,z11],[1,z11] psubw mm1, mm0 ;wsptr[xxx],[1,z12],[xxx],[1,z10] punpckhwd mm4, mm6 ;wsptr[xxx],[xxx],[0,z13],[1,z13] movq mm0, mm3 punpcklwd mm3, mm1 ;wsptr[xxx],[xxx],[0,z12],[1,z12] movq [esi+8*3], mm7 ;save tmp3 punpckhwd mm0, mm1 ;wsptr[xxx],[xxx],[0,z10],[1,z10] movq mm6, [esi+8*4] ;wsptr[2,0],[2,1],[2,2],[2,3] punpckhdq mm0, mm2 ;wsptr[0,z10],[1,z10],[0,z11],[1,z11] movq mm7, [esi+8*5] ;wsptr[2,4],[2,5],[2,6],[2,7] punpckhdq mm3, mm4 ;wsptr[0,z12],[1,z12],[0,z13],[1,z13] movq mm1, [esi+8*6] ;wsptr[3,0],[3,1],[3,2],[3,3] movq mm4, mm6 punpckldq mm6, mm7 ;wsptr[2,0],[2,1],[2,4],[2,5] movq mm5, mm1 punpckhdq mm7, mm4 ;wsptr[2,6],[2,7],[2,2],[2,3] movq mm2, mm6 movq mm4, [esi+8*7] ;wsptr[3,4],[3,5],[3,6],[3,7] paddw mm6, mm7 ;wsptr[xxx],[2,z11],[xxx],[2,z13] psubw mm2, mm7 ;wsptr[xxx],[2,z12],[xxx],[2,z10] punpckldq mm1, mm4 ;wsptr[3,0],[3,1],[3,4],[3,5] punpckhdq mm4, mm5 ;wsptr[3,6],[3,7],[3,2],[3,3] movq mm7, mm1 paddw mm1, mm4 ;wsptr[xxx],[3,z11],[xxx],[3,z13] psubw mm7, mm4 ;wsptr[xxx],[3,z12],[xxx],[3,z10] movq mm5, mm6 punpcklwd mm6, mm1 ;wsptr[xxx],[xxx],[2,z11],[3,z11] punpckhwd mm5, mm1 ;wsptr[xxx],[xxx],[2,z13],[3,z13] movq mm4, mm2 punpcklwd mm2, mm7 ;wsptr[xxx],[xxx],[2,z12],[3,z12] punpckhwd mm4, mm7 ;wsptr[xxx],[xxx],[2,z10],[3,z10] punpckhdq mm4, mm6 ;wsptr[2,z10],[3,z10],[2,z11],[3,z11] punpckhdq mm2, mm5 ;wsptr[2,z12],[3,z12],[2,z13],[3,z13] movq mm5, mm0 punpckldq mm0, mm4 ;wsptr[0,z10],[1,z10],[2,z10],[3,z10] punpckhdq mm5, mm4 ;wsptr[0,z11],[1,z11],[2,z11],[3,z11] movq mm4, mm3 punpckhdq mm4, mm2 ;wsptr[0,z13],[1,z13],[2,z13],[3,z13] movq mm1, mm5 punpckldq mm3, mm2 ;wsptr[0,z12],[1,z12],[2,z12],[3,z12]// tmp7 = z11 + z13; /* phase 5 */// tmp8 = z11 - z13; /* phase 5 */ psubw mm1, mm4 ;tmp8 paddw mm5, mm4 ;tmp7// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ psllw mm1, 2 psllw mm0, 2 pmulhw mm1, fix_141 ;tmp21// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ psllw mm3, 2 movq mm7, mm0 pmulhw mm7, fix_n184 movq mm6, mm3 movq mm2, [esi+8*0] ;tmp0,final1 pmulhw mm6, fix_108n184// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ movq mm4, mm2 ;final1 pmulhw mm0, fix_184n261 paddw mm2, mm5 ;tmp0+tmp7,final1 pmulhw mm3, fix_184 psubw mm4, mm5 ;tmp0-tmp7,final1// tmp6 = tmp22 - tmp7; /* phase 2 */ psraw mm2, 5 ;outptr[0,0],[1,0],[2,0],[3,0],final1 paddsw mm2, const_0x0080 ;final1 paddw mm7, mm6 ;tmp20 psraw mm4, 5 ;outptr[0,7],[1,7],[2,7],[3,7],final1 paddsw mm4, const_0x0080 ;final1 paddw mm3, mm0 ;tmp22// tmp5 = tmp21 - tmp6; psubw mm3, mm5 ;tmp6// tmp4 = tmp20 + tmp5; movq mm0, [esi+8*1] ;tmp1,final2 psubw mm1, mm3 ;tmp5 movq mm6, mm0 ;final2 paddw mm0, mm3 ;tmp1+tmp6,final2 /* Final output stage: scale down by a factor of 8 and range-limit */// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)// & RANGE_MASK];// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)// & RANGE_MASK]; final1// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)// & RANGE_MASK];// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)// & RANGE_MASK]; final2 psubw mm6, mm3 ;tmp1-tmp6,final2 psraw mm0, 5 ;outptr[0,1],[1,1],[2,1],[3,1] paddsw mm0, const_0x0080 psraw mm6, 5 ;outptr[0,6],[1,6],[2,6],[3,6] paddsw mm6, const_0x0080 ;need to check this value packuswb mm0, mm4 ;out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] movq mm5, [esi+8*2] ;tmp2,final3 packuswb mm2, mm6 ;out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)// & RANGE_MASK];// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)// & RANGE_MASK]; final3 paddw mm7, mm1 ;tmp4 movq mm3, mm5 paddw mm5, mm1 ;tmp2+tmp5 psubw mm3, mm1 ;tmp2-tmp5 psraw mm5, 5 ;outptr[0,2],[1,2],[2,2],[3,2] paddsw mm5, const_0x0080 movq mm4, [esi+8*3] ;tmp3,final4 psraw mm3, 5 ;outptr[0,5],[1,5],[2,5],[3,5] paddsw mm3, const_0x0080// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)// & RANGE_MASK];// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)// & RANGE_MASK]; final4 movq mm6, mm4 paddw mm4, mm7 ;tmp3+tmp4 psubw mm6, mm7 ;tmp3-tmp4 psraw mm4, 5 ;outptr[0,4],[1,4],[2,4],[3,4] mov ecx, [eax] paddsw mm4, const_0x0080 psraw mm6, 5 ;outptr[0,3],[1,3],[2,3],[3,3] paddsw mm6, const_0x0080 packuswb mm5, mm4 ;out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] packuswb mm6, mm3 ;out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] movq mm4, mm2 movq mm7, mm5 punpcklbw mm2, mm0 ;out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] punpckhbw mm4, mm0 ;out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] movq mm1, mm2 punpcklbw mm5, mm6 ;out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] add eax, 4 punpckhbw mm7, mm6 ;out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] punpcklwd mm2, mm5 ;out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] add ecx, output_col movq mm6, mm7 punpckhwd mm1, mm5 ;out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] movq mm0, mm2 punpcklwd mm6, mm4 ;out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] mov ebx, [eax] punpckldq mm2, mm6 ;out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] add eax, 4 movq mm3, mm1 add ebx, output_col punpckhwd mm7, mm4 ;out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] movq [ecx], mm2 punpckhdq mm0, mm6 ;out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] mov ecx, [eax] add eax, 4 add ecx, output_col movq [ebx], mm0 punpckldq mm1, mm7 ;out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] mov ebx, [eax] add ebx, output_col punpckhdq mm3, mm7 ;out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] movq [ecx], mm1 movq [ebx], mm3 emms }#endif#if defined(HAVE_MMX_ATT_MNEMONICS) __asm__ ( "pushl %%ebx\n\t" "movl %0, %%edi \n\t" "movl %1, %%ebx \n\t" "movl %2, %%esi \n\t" "addl $0x07,%%esi \n\t" //align wsptr to qword "andl $0xfffffff8,%%esi \n\t" //align wsptr to qword "movl %%esi,%%eax \n\t" /* Odd part */ "movq 8*10(%%ebx),%%mm1 \n\t" //load inptr[DCTSIZE*5] "pmullw 8*10(%%edi),%%mm1 \n\t" //tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); "movq 8*6(%%ebx),%%mm0 \n\t" //load inptr[DCTSIZE*3] "pmullw 8*6(%%edi),%%mm0 \n\t" //tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); "movq 8*2(%%ebx),%%mm3 \n\t" //load inptr[DCTSIZE*1] "movq %%mm1,%%mm2 \n\t" //copy tmp6 /* phase 6 */ "pmullw 8*2(%%edi),%%mm3 \n\t" //tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); "movq 8*14(%%ebx),%%mm4 \n\t" //load inptr[DCTSIZE*1] "paddw %%mm0,%%mm1 \n\t" //z13 = tmp6 + tmp5; "pmullw 8*14(%%edi),%%mm4 \n\t" //tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); "psubw %%mm0,%%mm2 \n\t" //z10 = tmp6 - tmp5 "psllw $2,%%mm2 \n\t" //shift z10 "movq %%mm2,%%mm0 \n\t" //copy z10 "pmulhw _fix_184n261,%%mm2 \n\t" //MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ "movq %%mm3,%%mm5 \n\t" //copy tmp4 "pmulhw _fix_n184,%%mm0 \n\t" //MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ "paddw %%mm4,%%mm3 \n\t" //z11 = tmp4 + tmp7; "movq %%mm3,%%mm6 \n\t" //copy z11 /* phase 5 */ "psubw %%mm4,%%mm5 \n\t" //z12 = tmp4 - tmp7; "psubw %%mm1,%%mm6 \n\t" //z11-z13 "psllw $2,%%mm5 \n\t" //shift z12 "movq 8*12(%%ebx),%%mm4 \n\t" //load inptr[DCTSIZE*6], even part "movq %%mm5,%%mm7 \n\t" //copy z12 "pmulhw _fix_108n184,%%mm5 \n\t" //MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part "paddw %%mm1,%%mm3 \n\t" //tmp7 = z11 + z13; /* Even part */ "pmulhw _fix_184,%%mm7 \n\t" //MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ "psllw $2,%%mm6 \n\t" "movq 8*4(%%ebx),%%mm1 \n\t" //load inptr[DCTSIZE*2] "pmullw 8*4(%%edi),%%mm1 \n\t" //tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); "paddw %%mm5,%%mm0 \n\t" //tmp10
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -