📄 jidctfst.c
字号:
"pmullw 8*12(%%edi),%%mm4 \n\t" //tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); "paddw %%mm7,%%mm2 \n\t" //tmp12 "pmulhw _fix_141,%%mm6 \n\t" //tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ "psubw %%mm3,%%mm2 \n\t" //tmp6 = tmp12 - tmp7 "movq %%mm1,%%mm5 \n\t" //copy tmp1 "paddw %%mm4,%%mm1 \n\t" //tmp13= tmp1 + tmp3; /* phases 5-3 */ "psubw %%mm4,%%mm5 \n\t" //tmp1-tmp3 "psubw %%mm2,%%mm6 \n\t" //tmp5 = tmp11 - tmp6; "movq %%mm1,8*0(%%esi) \n\t" //save tmp13 in workspace "psllw $2,%%mm5 \n\t" //shift tmp1-tmp3 "movq 8*0(%%ebx),%%mm7 \n\t" //load inptr[DCTSIZE*0] "pmulhw _fix_141,%%mm5 \n\t" //MULTIPLY(tmp1 - tmp3, FIX_1_414213562) "paddw %%mm6,%%mm0 \n\t" //tmp4 = tmp10 + tmp5; "pmullw 8*0(%%edi),%%mm7 \n\t" //tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); "movq 8*8(%%ebx),%%mm4 \n\t" //load inptr[DCTSIZE*4] "pmullw 8*8(%%edi),%%mm4 \n\t" //tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); "psubw %%mm1,%%mm5 \n\t" //tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ "movq %%mm0,8*4(%%esi) \n\t" //save tmp4 in workspace "movq %%mm7,%%mm1 \n\t" //copy tmp0 /* phase 3 */ "movq %%mm5,8*2(%%esi) \n\t" //save tmp12 in workspace "psubw %%mm4,%%mm1 \n\t" //tmp11 = tmp0 - tmp2; "paddw %%mm4,%%mm7 \n\t" //tmp10 = tmp0 + tmp2; "movq %%mm1,%%mm5 \n\t" //copy tmp11 "paddw 8*2(%%esi),%%mm1 \n\t" //tmp1 = tmp11 + tmp12; "movq %%mm7,%%mm4 \n\t" //copy tmp10 /* phase 2 */ "paddw 8*0(%%esi),%%mm7 \n\t" //tmp0 = tmp10 + tmp13; "psubw 8*0(%%esi),%%mm4 \n\t" //tmp3 = tmp10 - tmp13; "movq %%mm7,%%mm0 \n\t" //copy tmp0 "psubw 8*2(%%esi),%%mm5 \n\t" //tmp2 = tmp11 - tmp12; "paddw %%mm3,%%mm7 \n\t" //wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); "psubw %%mm3,%%mm0 \n\t" //wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); "movq %%mm7,8*0(%%esi) \n\t" //wsptr[DCTSIZE*0] "movq %%mm1,%%mm3 \n\t" //copy tmp1 "movq %%mm0,8*14(%%esi) \n\t" //wsptr[DCTSIZE*7] "paddw %%mm2,%%mm1 \n\t" //wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); "psubw %%mm2,%%mm3 \n\t" //wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); "movq %%mm1,8*2(%%esi) \n\t" //wsptr[DCTSIZE*1] "movq %%mm4,%%mm1 \n\t" //copy tmp3 "movq %%mm3,8*12(%%esi) \n\t" //wsptr[DCTSIZE*6] "paddw 8*4(%%esi),%%mm4 \n\t" //wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); "psubw 8*4(%%esi),%%mm1 \n\t" //wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); "movq %%mm4,8*8(%%esi) \n\t" "movq %%mm5,%%mm7 \n\t" //copy tmp2 "paddw %%mm6,%%mm5 \n\t" //wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) "movq %%mm1,8*6(%%esi) \n\t" // "psubw %%mm6,%%mm7 \n\t" //wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); "movq %%mm5,8*4(%%esi) \n\t" "movq %%mm7,8*10(%%esi) \n\t"/*****************************************************************/ "addl $8,%%edi \n\t" "addl $8,%%ebx \n\t" "addl $8,%%esi \n\t"/*****************************************************************/ "movq 8*10(%%ebx),%%mm1 \n\t" //load inptr[DCTSIZE*5] "pmullw 8*10(%%edi),%%mm1 \n\t" //tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); "movq 8*6(%%ebx),%%mm0 \n\t" //load inptr[DCTSIZE*3] "pmullw 8*6(%%edi),%%mm0 \n\t" //tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); "movq 8*2(%%ebx),%%mm3 \n\t" //load inptr[DCTSIZE*1] "movq %%mm1,%%mm2 \n\t" //copy tmp6 /* phase 6 */ "pmullw 8*2(%%edi),%%mm3 \n\t" //tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); "movq 8*14(%%ebx),%%mm4 \n\t" //load inptr[DCTSIZE*1] "paddw %%mm0,%%mm1 \n\t" //z13 = tmp6 + tmp5; "pmullw 8*14(%%edi),%%mm4 \n\t" //tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); "psubw %%mm0,%%mm2 \n\t" //z10 = tmp6 - tmp5 "psllw $2,%%mm2 \n\t" //shift z10 "movq %%mm2,%%mm0 \n\t" //copy z10 "pmulhw _fix_184n261,%%mm2 \n\t" //MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ "movq %%mm3,%%mm5 \n\t" //copy tmp4 "pmulhw _fix_n184,%%mm0 \n\t" //MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ "paddw %%mm4,%%mm3 \n\t" //z11 = tmp4 + tmp7; "movq %%mm3,%%mm6 \n\t" //copy z11 /* phase 5 */ "psubw %%mm4,%%mm5 \n\t" //z12 = tmp4 - tmp7; "psubw %%mm1,%%mm6 \n\t" //z11-z13 "psllw $2,%%mm5 \n\t" //shift z12 "movq 8*12(%%ebx),%%mm4 \n\t" //load inptr[DCTSIZE*6], even part "movq %%mm5,%%mm7 \n\t" //copy z12 "pmulhw _fix_108n184,%%mm5 \n\t" //MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part "paddw %%mm1,%%mm3 \n\t" //tmp7 = z11 + z13; /* Even part */ "pmulhw _fix_184,%%mm7 \n\t" //MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ "psllw $2,%%mm6 \n\t" "movq 8*4(%%ebx),%%mm1 \n\t" //load inptr[DCTSIZE*2] "pmullw 8*4(%%edi),%%mm1 \n\t" //tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); "paddw %%mm5,%%mm0 \n\t" //tmp10 "pmullw 8*12(%%edi),%%mm4 \n\t" //tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); "paddw %%mm7,%%mm2 \n\t" //tmp12 "pmulhw _fix_141,%%mm6 \n\t" //tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ "psubw %%mm3,%%mm2 \n\t" //tmp6 = tmp12 - tmp7 "movq %%mm1,%%mm5 \n\t" //copy tmp1 "paddw %%mm4,%%mm1 \n\t" //tmp13= tmp1 + tmp3; /* phases 5-3 */ "psubw %%mm4,%%mm5 \n\t" //tmp1-tmp3 "psubw %%mm2,%%mm6 \n\t" //tmp5 = tmp11 - tmp6; "movq %%mm1,8*0(%%esi) \n\t" //save tmp13 in workspace "psllw $2,%%mm5 \n\t" //shift tmp1-tmp3 "movq 8*0(%%ebx),%%mm7 \n\t" //load inptr[DCTSIZE*0] "paddw %%mm6,%%mm0 \n\t" //tmp4 = tmp10 + tmp5; "pmulhw _fix_141,%%mm5 \n\t" //MULTIPLY(tmp1 - tmp3, FIX_1_414213562) "pmullw 8*0(%%edi),%%mm7 \n\t" //tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); "movq 8*8(%%ebx),%%mm4 \n\t" //load inptr[DCTSIZE*4] "pmullw 8*8(%%edi),%%mm4 \n\t" //tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); "psubw %%mm1,%%mm5 \n\t" //tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ "movq %%mm0,8*4(%%esi) \n\t" //save tmp4 in workspace "movq %%mm7,%%mm1 \n\t" //copy tmp0 /* phase 3 */ "movq %%mm5,8*2(%%esi) \n\t" //save tmp12 in workspace "psubw %%mm4,%%mm1 \n\t" //tmp11 = tmp0 - tmp2; "paddw %%mm4,%%mm7 \n\t" //tmp10 = tmp0 + tmp2; "movq %%mm1,%%mm5 \n\t" //copy tmp11 "paddw 8*2(%%esi),%%mm1 \n\t" //tmp1 = tmp11 + tmp12; "movq %%mm7,%%mm4 \n\t" //copy tmp10 /* phase 2 */ "paddw 8*0(%%esi),%%mm7 \n\t" //tmp0 = tmp10 + tmp13; "psubw 8*0(%%esi),%%mm4 \n\t" //tmp3 = tmp10 - tmp13; "movq %%mm7,%%mm0 \n\t" //copy tmp0 "psubw 8*2(%%esi),%%mm5 \n\t" //tmp2 = tmp11 - tmp12; "paddw %%mm3,%%mm7 \n\t" //wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); "psubw %%mm3,%%mm0 \n\t" //wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); "movq %%mm7,8*0(%%esi) \n\t" //wsptr[DCTSIZE*0] "movq %%mm1,%%mm3 \n\t" //copy tmp1 "movq %%mm0,8*14(%%esi) \n\t" //wsptr[DCTSIZE*7] "paddw %%mm2,%%mm1 \n\t" //wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); "psubw %%mm2,%%mm3 \n\t" //wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); "movq %%mm1,8*2(%%esi) \n\t" //wsptr[DCTSIZE*1] "movq %%mm4,%%mm1 \n\t" //copy tmp3 "movq %%mm3,8*12(%%esi) \n\t" //wsptr[DCTSIZE*6] "paddw 8*4(%%esi),%%mm4 \n\t" //wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); "psubw 8*4(%%esi),%%mm1 \n\t" //wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); "movq %%mm4,8*8(%%esi) \n\t" "movq %%mm5,%%mm7 \n\t" //copy tmp2 "paddw %%mm6,%%mm5 \n\t" //wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) "movq %%mm1,8*6(%%esi) \n\t" // "psubw %%mm6,%%mm7 \n\t" //wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); "movq %%mm5,8*4(%%esi) \n\t" "movq %%mm7,8*10(%%esi) \n\t"/*****************************************************************/ /* Pass 2: process rows from work array, store into output array. */ /* Note that we must descale the results by a factor of 8 == 2**3, */ /* and also undo the PASS1_BITS scaling. *//*****************************************************************/ /* Even part */ "movl %%eax,%%esi \n\t" "movl %3, %%eax \n\t"// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); "movq 8*0(%%esi),%%mm0 \n\t" //wsptr[0,0],[0,1],[0,2],[0,3] "movq 8*1(%%esi),%%mm1 \n\t" //wsptr[0,4],[0,5],[0,6],[0,7] "movq %%mm0,%%mm2 \n\t" "movq 8*2(%%esi),%%mm3 \n\t" //wsptr[1,0],[1,1],[1,2],[1,3] "paddw %%mm1,%%mm0 \n\t" //wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] "movq 8*3(%%esi),%%mm4 \n\t" //wsptr[1,4],[1,5],[1,6],[1,7] "psubw %%mm1,%%mm2 \n\t" //wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] "movq %%mm0,%%mm6 \n\t" "movq %%mm3,%%mm5 \n\t" "paddw %%mm4,%%mm3 \n\t" //wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] "movq %%mm2,%%mm1 \n\t" "psubw %%mm4,%%mm5 \n\t" //wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] "punpcklwd %%mm3,%%mm0 \n\t" //wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] "movq 8*7(%%esi),%%mm7 \n\t" //wsptr[3,4],[3,5],[3,6],[3,7] "punpckhwd %%mm3,%%mm6 \n\t" //wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] "movq 8*4(%%esi),%%mm3 \n\t" //wsptr[2,0],[2,1],[2,2],[2,3] "punpckldq %%mm6,%%mm0 \n\t" //wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] "punpcklwd %%mm5,%%mm1 \n\t" //wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] "movq %%mm3,%%mm4 \n\t" "movq 8*6(%%esi),%%mm6 \n\t" //wsptr[3,0],[3,1],[3,2],[3,3] "punpckhwd %%mm5,%%mm2 \n\t" //wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] "movq 8*5(%%esi),%%mm5 \n\t" //wsptr[2,4],[2,5],[2,6],[2,7] "punpckldq %%mm2,%%mm1 \n\t" //wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] "paddw %%mm5,%%mm3 \n\t" //wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] "movq %%mm6,%%mm2 \n\t" "psubw %%mm5,%%mm4 \n\t" //wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] "paddw %%mm7,%%mm6 \n\t" //wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] "movq %%mm3,%%mm5 \n\t" "punpcklwd %%mm6,%%mm3 \n\t" //wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] "psubw %%mm7,%%mm2 \n\t" //wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] "punpckhwd %%mm6,%%mm5 \n\t" //wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] "movq %%mm4,%%mm7 \n\t" "punpckldq %%mm5,%%mm3 \n\t" //wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] "punpcklwd %%mm2,%%mm4 \n\t" //wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] "punpckhwd %%mm2,%%mm7 \n\t" //wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] "punpckldq %%mm7,%%mm4 \n\t" //wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] "movq %%mm1,%%mm6 \n\t"// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] "movq %%mm0,%%mm2 \n\t" "punpckhdq %%mm4,%%mm6 \n\t" //wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] "punpckldq %%mm4,%%mm1 \n\t" //wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] "psllw $2,%%mm6 \n\t" "pmulhw _fix_141,%%mm6 \n\t" "punpckldq %%mm3,%%mm0 \n\t" //wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] "punpckhdq %%mm3,%%mm2 \n\t" //wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] "movq %%mm0,%%mm7 \n\t"// tmp0 = tmp10 + tmp13;// tmp3 = tmp10 - tmp13; "paddw %%mm2,%%mm0 \n\t" //[0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] "psubw %%mm2,%%mm7 \n\t" //[0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; "psubw %%mm2,%%mm6 \n\t" //wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]// tmp1 = tmp11 + tmp12;// tmp2 = tmp11 - tmp12; "movq %%mm1,%%mm5 \n\t" /* Odd part */// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; "movq 8*0(%%esi),%%mm3 \n\t" //wsptr[0,0],[0,1],[0,2],[0,3] "paddw %%mm6,%%mm1 \n\t" //[0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] "movq 8*1(%%esi),%%mm4 \n\t" //wsptr[0,4],[0,5],[0,6],[0,7] "psubw %%mm6,%%mm5 \n\t" //[0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] "movq %%mm3,%%mm6 \n\t" "punpckldq %%mm4,%%mm3 \n\t" //wsptr[0,0],[0,1],[0,4],[0,5] "punpckhdq %%mm6,%%mm4 \n\t" //wsptr[0,6],[0,7],[0,2],[0,3] "movq %%mm3,%%mm2 \n\t"//Save tmp0 and tmp1 in wsptr "movq %%mm0,8*0(%%esi) \n\t" //save tmp0 "paddw %%mm4,%%mm2 \n\t" //wsptr[xxx],[0,z11],[xxx],[0,z13]//Continue with z10 --- z13 "movq 8*2(%%esi),%%mm6 \n\t" //wsptr[1,0],[1,1],[1,2],[1,3] "psubw %%mm4,%%mm3 \n\t" //wsptr[xxx],[0,z12],[xxx],[0,z10] "movq 8*3(%%esi),%%mm0 \n\t" //wsptr[1,4],[1,5],[1,6],[1,7] "movq %%mm6,%%mm4 \n\t" "movq %%mm1,8*1(%%esi) \n\t" //save tmp1 "punpckldq %%mm0,%%mm6 \n\t" //wsptr[1,0],[1,1],[1,4],[1,5] "punpckhdq %%mm4,%%mm0 \n\t" //wsptr[1,6],[1,7],[1,2],[1,3] "movq %%mm6,%%mm1 \n\t"//Save tmp2 and tmp3 in wsptr "paddw %%mm0,%%mm6 \n\t" //wsptr[xxx],[1,z11],[xxx],[1,z13] "movq %%mm2,%%mm4 \n\t"//Continue with z10 --- z13 "movq %%mm5,8*2(%%esi) \n\t" //save tmp2 "punpcklwd %%mm6,%%mm2 \n\t" //wsptr[xxx],[xxx],[0,z11],[1,z11] "psubw %%mm0,%%mm1 \n\t" //wsptr[xxx],[1,z12],[xxx],[1,z10] "punpckhwd %%mm6,%%mm4 \n\t" //wsptr[xxx],[xxx],[0,z13],[1,z13] "movq %%mm3,%%mm0 \n\t" "punpcklwd %%mm1,%%mm3 \n\t" //wsptr[xxx],[xxx],[0,z12],[1,z12] "movq %%mm7,8*3(%%esi) \n\t" //save tmp3 "punpckhwd %%mm1,%%mm0 \n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -