vf_fspp.c
来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 2,126 行 · 第 1/5 页
C
2,126 行
tmp7 = (z11 + z13)>>2; //+2 ! tmp11 = MULTIPLY16H((z11 - z13)<<1, FIX_1_414213562); z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065); tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5; tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !! tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; tmp4 = tmp10 + tmp5; wsptr[DCTSIZE*0]+= (tmp0 + tmp7); wsptr[DCTSIZE*1]+= (tmp1 + tmp6); wsptr[DCTSIZE*2]+= (tmp2 + tmp5); wsptr[DCTSIZE*3]+= (tmp3 - tmp4); wsptr[DCTSIZE*4]+= (tmp3 + tmp4); wsptr[DCTSIZE*5]+= (tmp2 - tmp5); wsptr[DCTSIZE*6]= (tmp1 - tmp6); wsptr[DCTSIZE*7]= (tmp0 - tmp7); // dataptr++; //next column wsptr++; threshold++; } dataptr+=8; //skip each second start pos wsptr +=8; }}#else /* HAVE_MMX */static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt){ asm volatile( ASMALIGN(4) "1: \n\t" "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t" // "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t" "movq %%mm1, %%mm0 \n\t" "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0 "movq %%mm7, %%mm3 \n\t" "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3 "movq %%mm1, %%mm5 \n\t" "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t" "psubw %%mm7, %%mm1 \n\t" //t13 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" "movq %%mm6, %%mm4 \n\t" "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1 "paddw %%mm7, %%mm5 \n\t" //t10 "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2 "movq %%mm6, %%mm7 \n\t" "paddw %%mm2, %%mm6 \n\t" //t11 "psubw %%mm2, %%mm7 \n\t" //t12 "movq %%mm5, %%mm2 \n\t" "paddw %%mm6, %%mm5 \n\t" //d0 // i0 t13 t12 i3 i1 d0 - d4 "psubw %%mm6, %%mm2 \n\t" //d4 "paddw %%mm1, %%mm7 \n\t" "movq 4*16(%%"REG_d"), %%mm6 \n\t" "psllw $2, %%mm7 \n\t" "psubw 0*16(%%"REG_d"), %%mm5 \n\t" "psubw %%mm6, %%mm2 \n\t" "paddusw 0*16(%%"REG_d"), %%mm5 \n\t" "paddusw %%mm6, %%mm2 \n\t" "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm7 \n\t" // "paddw 0*16(%%"REG_d"), %%mm5 \n\t" "paddw %%mm6, %%mm2 \n\t" "psubusw 0*16(%%"REG_d"), %%mm5 \n\t" "psubusw %%mm6, %%mm2 \n\t"//This func is totally compute-bound, operates at huge speed. So, DC shortcut// at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3).//However, typical numbers: nondc - 29%%, dc - 46%%, zero - 25%%. All <> 0 case is very rare. "paddw "MANGLE(MM_2)", %%mm5 \n\t" "movq %%mm2, %%mm6 \n\t" "paddw %%mm5, %%mm2 \n\t" "psubw %%mm6, %%mm5 \n\t" "movq %%mm1, %%mm6 \n\t" "paddw %%mm7, %%mm1 \n\t" //d2 "psubw 2*16(%%"REG_d"), %%mm1 \n\t" "psubw %%mm7, %%mm6 \n\t" //d6 "movq 6*16(%%"REG_d"), %%mm7 \n\t" "psraw $2, %%mm5 \n\t" "paddusw 2*16(%%"REG_d"), %%mm1 \n\t" "psubw %%mm7, %%mm6 \n\t" // t7 d2 /t11 t4 t6 - d6 /t10 "paddw 2*16(%%"REG_d"), %%mm1 \n\t" "paddusw %%mm7, %%mm6 \n\t" "psubusw 2*16(%%"REG_d"), %%mm1 \n\t" "paddw %%mm7, %%mm6 \n\t" "psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t" "psubusw %%mm7, %%mm6 \n\t" //movq [edi+"DCTSIZE_S"*2*2], mm1 //movq [edi+"DCTSIZE_S"*6*2], mm6 "movq %%mm1, %%mm7 \n\t" "psraw $2, %%mm2 \n\t" "psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t" "psubw %%mm6, %%mm1 \n\t" "psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t" "paddw %%mm7, %%mm6 \n\t" //'t13 "psraw $2, %%mm6 \n\t" //paddw mm6, MM_2 !! --- "movq %%mm2, %%mm7 \n\t" "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t" "paddw %%mm6, %%mm2 \n\t" //'t0 "movq %%mm2, "MANGLE(temps)"+0*8 \n\t" //! "psubw %%mm6, %%mm7 \n\t" //'t3 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" "psubw %%mm6, %%mm1 \n\t" //'t12 "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5 "movq %%mm5, %%mm6 \n\t" "movq %%mm7, "MANGLE(temps)"+3*8 \n\t" "paddw %%mm2, %%mm3 \n\t" //t10 "paddw %%mm4, %%mm2 \n\t" //t11 "paddw %%mm0, %%mm4 \n\t" //t12 "movq %%mm3, %%mm7 \n\t" "psubw %%mm4, %%mm3 \n\t" "psllw $2, %%mm3 \n\t" "psllw $2, %%mm7 \n\t" //opt for P6 "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" "psllw $2, %%mm4 \n\t" "pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm7 \n\t" "psllw $2, %%mm2 \n\t" "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t" "paddw %%mm1, %%mm5 \n\t" //'t1 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm2 \n\t" "psubw %%mm1, %%mm6 \n\t" //'t2 // t7 't12 't11 t4 t6 - 't13 't10 --- "paddw %%mm3, %%mm7 \n\t" //z2 "movq %%mm5, "MANGLE(temps)"+1*8 \n\t" "paddw %%mm3, %%mm4 \n\t" //z4 "movq 3*16(%%"REG_d"), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm6, "MANGLE(temps)"+2*8 \n\t" "psubw %%mm2, %%mm1 \n\t" //z13 //=== "paddw %%mm2, %%mm0 \n\t" //z11 "movq %%mm1, %%mm5 \n\t" "movq 5*16(%%"REG_d"), %%mm2 \n\t" "psubw %%mm7, %%mm1 \n\t" //d3 "paddw %%mm7, %%mm5 \n\t" //d5 "psubw %%mm3, %%mm1 \n\t" "movq 1*16(%%"REG_d"), %%mm7 \n\t" "psubw %%mm2, %%mm5 \n\t" "movq %%mm0, %%mm6 \n\t" "paddw %%mm4, %%mm0 \n\t" //d1 "paddusw %%mm3, %%mm1 \n\t" "psubw %%mm4, %%mm6 \n\t" //d7 // d1 d3 - - - d5 d7 - "movq 7*16(%%"REG_d"), %%mm4 \n\t" "psubw %%mm7, %%mm0 \n\t" "psubw %%mm4, %%mm6 \n\t" "paddusw %%mm2, %%mm5 \n\t" "paddusw %%mm4, %%mm6 \n\t" "paddw %%mm3, %%mm1 \n\t" "paddw %%mm2, %%mm5 \n\t" "paddw %%mm4, %%mm6 \n\t" "psubusw %%mm3, %%mm1 \n\t" "psubusw %%mm2, %%mm5 \n\t" "psubusw %%mm4, %%mm6 \n\t" "movq %%mm1, %%mm4 \n\t" "por %%mm5, %%mm4 \n\t" "paddusw %%mm7, %%mm0 \n\t" "por %%mm6, %%mm4 \n\t" "paddw %%mm7, %%mm0 \n\t" "packssdw %%mm4, %%mm4 \n\t" "psubusw %%mm7, %%mm0 \n\t" "movd %%mm4, %%"REG_a" \n\t" "or %%"REG_a", %%"REG_a" \n\t" "jnz 2f \n\t" //movq [edi+"DCTSIZE_S"*3*2], mm1 //movq [edi+"DCTSIZE_S"*5*2], mm5 //movq [edi+"DCTSIZE_S"*1*2], mm0 //movq [edi+"DCTSIZE_S"*7*2], mm6 // t4 t5 - - - t6 t7 - //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0//Typical numbers: nondc - 19%%, dc - 26%%, zero - 55%%. zero case alone isn't worthwhile "movq "MANGLE(temps)"+0*8, %%mm4 \n\t" "movq %%mm0, %%mm1 \n\t" "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6 "movq %%mm1, %%mm2 \n\t" "movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t" "movq %%mm2, %%mm3 \n\t" "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5 "paddw %%mm4, %%mm5 \n\t" "movq "MANGLE(temps)"+1*8, %%mm6 \n\t" //paddw mm3, MM_2 "psraw $2, %%mm3 \n\t" //tmp7 "pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4 "psubw %%mm3, %%mm4 \n\t" "movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t" "paddw %%mm3, %%mm5 \n\t" "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t" "paddw %%mm6, %%mm7 \n\t" "movq "MANGLE(temps)"+2*8, %%mm3 \n\t" "psubw %%mm0, %%mm6 \n\t" "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t" "paddw %%mm0, %%mm7 \n\t" "movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" "paddw %%mm3, %%mm4 \n\t" "movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t" "psubw %%mm1, %%mm3 \n\t" "movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t" "paddw %%mm1, %%mm4 \n\t" "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t" "paddw %%mm3, %%mm5 \n\t" "movq "MANGLE(temps)"+3*8, %%mm0 \n\t" "add $8, %%"REG_S" \n\t" "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" "paddw %%mm0, %%mm6 \n\t" "movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" "psubw %%mm2, %%mm0 \n\t" "movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t" "paddw %%mm2, %%mm6 \n\t" "movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t" "paddw %%mm0, %%mm7 \n\t" "movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" "movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t" "add $8, %%"REG_D" \n\t" "jmp 4f \n\t" "2: \n\t" //--- non DC2 //psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1 (actually thr1, thr1, thr1-1) //psraw mm5, 2 //psraw mm0, 2 //psraw mm6, 2 "movq %%mm5, %%mm3 \n\t" "psubw %%mm1, %%mm5 \n\t" "psllw $1, %%mm5 \n\t" //'z10 "paddw %%mm1, %%mm3 \n\t" //'z13 "movq %%mm0, %%mm2 \n\t" "psubw %%mm6, %%mm0 \n\t" "movq %%mm5, %%mm1 \n\t" "psllw $1, %%mm0 \n\t" //'z12 "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //- "paddw %%mm0, %%mm5 \n\t" "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5 "paddw %%mm6, %%mm2 \n\t" //'z11 "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t" "movq %%mm2, %%mm7 \n\t" //--- "movq "MANGLE(temps)"+0*8, %%mm4 \n\t" "psubw %%mm3, %%mm2 \n\t" "psllw $1, %%mm2 \n\t" "paddw %%mm3, %%mm7 \n\t" //'t7 "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11 "movq %%mm4, %%mm6 \n\t" //paddw mm7, MM_2 "psraw $2, %%mm7 \n\t" "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t" "psubw %%mm7, %%mm6 \n\t" "movq "MANGLE(temps)"+1*8, %%mm3 \n\t" "paddw %%mm7, %%mm4 \n\t" "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t" "paddw %%mm5, %%mm1 \n\t" //'t12 "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" "psubw %%mm7, %%mm1 \n\t" //'t6 "movq "MANGLE(temps)"+2*8, %%mm7 \n\t" "psubw %%mm5, %%mm0 \n\t" //'t10 "movq "MANGLE(temps)"+3*8, %%mm6 \n\t" "movq %%mm3, %%mm5 \n\t" "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t" "psubw %%mm1, %%mm5 \n\t" "psubw %%mm1, %%mm2 \n\t" //'t5 "paddw %%mm1, %%mm3 \n\t" "movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t" "movq %%mm7, %%mm4 \n\t" "paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t" "psubw %%mm2, %%mm4 \n\t" "paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t" "paddw %%mm2, %%mm7 \n\t" "movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" "paddw %%mm2, %%mm0 \n\t" //'t4 // 't4 't6 't5 - - - - 't7 "movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" "movq %%mm6, %%mm1 \n\t" "paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t" "psubw %%mm0, %%mm1 \n\t" "paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t" "paddw %%mm0, %%mm6 \n\t" "movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t" "add $8, %%"REG_S" \n\t" "movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t" "movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" "add $8, %%"REG_D" \n\t" "4: \n\t"//=part 2 (the same)=========================================================== "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t" // "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t" "movq %%mm1, %%mm0 \n\t" "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0 "movq %%mm7, %%mm3 \n\t" "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3 "movq %%mm1, %%mm5 \n\t" "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t" "psubw %%mm7, %%mm1 \n\t" //t13 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t" "movq %%mm6, %%mm4 \n\t" "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1 "paddw %%mm7, %%mm5 \n\t" //t10 "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2 "movq %%mm6, %%mm7 \n\t" "paddw %%mm2, %%mm6 \n\t" //t11 "psubw %%mm2, %%mm7 \n\t" //t12 "movq %%mm5, %%mm2 \n\t" "paddw %%mm6, %%mm5 \n\t" //d0 // i0 t13 t12 i3 i1 d0 - d4 "psubw %%mm6, %%mm2 \n\t" //d4 "paddw %%mm1, %%mm7 \n\t"
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?