vf_fspp.c

来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 2,126 行 · 第 1/5 页

C
2,126
字号
	"movq  1*8+4*16(%%"REG_d"), %%mm6  \n\t"	"psllw $2, %%mm7              \n\t"	"psubw 1*8+0*16(%%"REG_d"), %%mm5  \n\t"	"psubw %%mm6, %%mm2            \n\t"	"paddusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"	"paddusw %%mm6, %%mm2          \n\t"	"pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm7 \n\t"	//	"paddw 1*8+0*16(%%"REG_d"), %%mm5  \n\t"	"paddw %%mm6, %%mm2            \n\t"	"psubusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"	"psubusw %%mm6, %%mm2          \n\t"//This func is totally compute-bound,  operates at huge speed. So,  DC shortcut// at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3).//However,  typical numbers: nondc - 29%%,  dc - 46%%,  zero - 25%%. All <> 0 case is very rare.	"paddw "MANGLE(MM_2)", %%mm5            \n\t"	"movq %%mm2, %%mm6             \n\t"	"paddw %%mm5, %%mm2            \n\t"	"psubw %%mm6, %%mm5            \n\t"	"movq %%mm1, %%mm6             \n\t"	"paddw %%mm7, %%mm1            \n\t" //d2	"psubw 1*8+2*16(%%"REG_d"), %%mm1  \n\t"	"psubw %%mm7, %%mm6            \n\t" //d6	"movq 1*8+6*16(%%"REG_d"), %%mm7   \n\t"	"psraw $2, %%mm5              \n\t"	"paddusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"	"psubw %%mm7, %%mm6            \n\t"	// t7 d2 /t11 t4 t6 - d6 /t10     	"paddw 1*8+2*16(%%"REG_d"), %%mm1  \n\t"	"paddusw %%mm7, %%mm6          \n\t"	"psubusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"	"paddw %%mm7, %%mm6            \n\t"	"psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t"	"psubusw %%mm7, %%mm6          \n\t"	//movq [edi+"DCTSIZE_S"*2*2], mm1	//movq [edi+"DCTSIZE_S"*6*2], mm6     	"movq %%mm1, %%mm7             \n\t"	"psraw $2, %%mm2              \n\t"	"psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t"	"psubw %%mm6, %%mm1            \n\t"	"psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t"	"paddw %%mm7, %%mm6            \n\t" //'t13	"psraw $2, %%mm6              \n\t" //paddw mm6, MM_2 !!    ---	"movq %%mm2, %%mm7             \n\t"	"pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"	"paddw %%mm6, %%mm2            \n\t" //'t0	"movq %%mm2, "MANGLE(temps)"+0*8       \n\t" //!	"psubw %%mm6, %%mm7            \n\t" //'t3	"movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"	"psubw %%mm6, %%mm1            \n\t" //'t12        	"psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5	"movq %%mm5, %%mm6             \n\t"	"movq %%mm7, "MANGLE(temps)"+3*8       \n\t"	"paddw %%mm2, %%mm3            \n\t" //t10	"paddw %%mm4, %%mm2            \n\t" //t11	"paddw %%mm0, %%mm4            \n\t" //t12	"movq %%mm3, %%mm7             \n\t"	"psubw %%mm4, %%mm3            \n\t"	"psllw $2, %%mm3              \n\t"	"psllw $2, %%mm7              \n\t" //opt for P6	"pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t"	"psllw $2, %%mm4              \n\t"	"pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm7 \n\t"	"psllw $2, %%mm2              \n\t"	"pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t"	"paddw %%mm1, %%mm5            \n\t" //'t1	"pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm2 \n\t"	"psubw %%mm1, %%mm6            \n\t" //'t2	// t7 't12 't11 t4 t6 - 't13 't10   ---	"paddw %%mm3, %%mm7            \n\t" //z2        	"movq %%mm5, "MANGLE(temps)"+1*8       \n\t"	"paddw %%mm3, %%mm4            \n\t" //z4	"movq 1*8+3*16(%%"REG_d"), %%mm3   \n\t"	"movq %%mm0, %%mm1             \n\t"	"movq %%mm6, "MANGLE(temps)"+2*8       \n\t"	"psubw %%mm2, %%mm1            \n\t" //z13            //===	"paddw %%mm2, %%mm0            \n\t" //z11 	"movq %%mm1, %%mm5             \n\t"	"movq 1*8+5*16(%%"REG_d"), %%mm2   \n\t"	"psubw %%mm7, %%mm1            \n\t" //d3	"paddw %%mm7, %%mm5            \n\t" //d5	"psubw %%mm3, %%mm1            \n\t"	"movq 1*8+1*16(%%"REG_d"), %%mm7   \n\t"	"psubw %%mm2, %%mm5            \n\t"	"movq %%mm0, %%mm6             \n\t"	"paddw %%mm4, %%mm0            \n\t" //d1    	"paddusw %%mm3, %%mm1          \n\t"	"psubw %%mm4, %%mm6            \n\t" //d7  	// d1 d3 - - - d5 d7 -    	"movq 1*8+7*16(%%"REG_d"), %%mm4   \n\t"	"psubw %%mm7, %%mm0            \n\t"	"psubw %%mm4, %%mm6            \n\t"	"paddusw %%mm2, %%mm5          \n\t"	"paddusw %%mm4, %%mm6          \n\t"	"paddw %%mm3, %%mm1            \n\t"	"paddw %%mm2, %%mm5            \n\t"	"paddw %%mm4, %%mm6            \n\t"	"psubusw %%mm3, %%mm1          \n\t"	"psubusw %%mm2, %%mm5          \n\t"	"psubusw %%mm4, %%mm6          \n\t"	"movq %%mm1, %%mm4             \n\t"	"por %%mm5, %%mm4              \n\t"	"paddusw %%mm7, %%mm0          \n\t"	"por %%mm6, %%mm4              \n\t"	"paddw %%mm7, %%mm0            \n\t"	"packssdw %%mm4, %%mm4         \n\t"	"psubusw %%mm7, %%mm0          \n\t"	"movd %%mm4, %%"REG_a"             \n\t"	"or %%"REG_a", %%"REG_a"              \n\t"	"jnz 3f                 \n\t"	//movq [edi+"DCTSIZE_S"*3*2], mm1	//movq [edi+"DCTSIZE_S"*5*2], mm5	//movq [edi+"DCTSIZE_S"*1*2], mm0	//movq [edi+"DCTSIZE_S"*7*2], mm6	// t4 t5 - - - t6 t7 -	//--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0//Typical numbers: nondc - 19%%,  dc - 26%%,  zero - 55%%. zero case alone isn't worthwhile	"movq "MANGLE(temps)"+0*8, %%mm4       \n\t"	"movq %%mm0, %%mm1             \n\t"	"pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6	"movq %%mm1, %%mm2             \n\t"	"movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t"	"movq %%mm2, %%mm3             \n\t"	"pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5	"paddw %%mm4, %%mm5            \n\t"	"movq "MANGLE(temps)"+1*8, %%mm6       \n\t"	//paddw mm3, MM_2	"psraw $2, %%mm3              \n\t" //tmp7     	"pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4	"psubw %%mm3, %%mm4            \n\t"	"movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t"	"paddw %%mm3, %%mm5            \n\t"	"movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"	"paddw %%mm6, %%mm7            \n\t"	"movq "MANGLE(temps)"+2*8, %%mm3       \n\t"	"psubw %%mm0, %%mm6            \n\t"	"movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"	"paddw %%mm0, %%mm7            \n\t"	"movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"	"paddw %%mm3, %%mm4            \n\t"	"movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"	"psubw %%mm1, %%mm3            \n\t"	"movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t"	"paddw %%mm1, %%mm4            \n\t"	"movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"	"paddw %%mm3, %%mm5            \n\t"	"movq "MANGLE(temps)"+3*8, %%mm0       \n\t"	"add $24, %%"REG_S"              \n\t"	"movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"	"paddw %%mm0, %%mm6            \n\t"	"movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"	"psubw %%mm2, %%mm0            \n\t"	"movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t"	"paddw %%mm2, %%mm6            \n\t"	"movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"	"paddw %%mm0, %%mm7            \n\t"	"movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"	"movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"	"add $24, %%"REG_D"              \n\t"	"sub $2, %%"REG_c"               \n\t"	"jnz 1b                \n\t"	"jmp 5f                   \n\t"	"3:                    \n\t"	//--- non DC2	//psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1  (actually thr1, thr1, thr1-1)	//psraw mm5, 2              	//psraw mm0, 2	//psraw mm6, 2	"movq %%mm5, %%mm3             \n\t"	"psubw %%mm1, %%mm5            \n\t"	"psllw $1, %%mm5              \n\t" //'z10	"paddw %%mm1, %%mm3            \n\t" //'z13	"movq %%mm0, %%mm2             \n\t"	"psubw %%mm6, %%mm0            \n\t"	"movq %%mm5, %%mm1             \n\t"	"psllw $1, %%mm0              \n\t" //'z12	"pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //-	"paddw %%mm0, %%mm5            \n\t"	"pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5	"paddw %%mm6, %%mm2            \n\t" //'z11	"pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t"	"movq %%mm2, %%mm7             \n\t"	//---	"movq "MANGLE(temps)"+0*8, %%mm4       \n\t"	"psubw %%mm3, %%mm2            \n\t"	"psllw $1, %%mm2              \n\t"	"paddw %%mm3, %%mm7            \n\t" //'t7	"pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11	"movq %%mm4, %%mm6             \n\t"	//paddw mm7, MM_2	"psraw $2, %%mm7              \n\t"	"paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"	"psubw %%mm7, %%mm6            \n\t"	"movq "MANGLE(temps)"+1*8, %%mm3       \n\t"	"paddw %%mm7, %%mm4            \n\t"	"movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"	"paddw %%mm5, %%mm1            \n\t" //'t12	"movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"	"psubw %%mm7, %%mm1            \n\t" //'t6	"movq "MANGLE(temps)"+2*8, %%mm7       \n\t"	"psubw %%mm5, %%mm0            \n\t" //'t10	"movq "MANGLE(temps)"+3*8, %%mm6       \n\t"	"movq %%mm3, %%mm5             \n\t"	"paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"	"psubw %%mm1, %%mm5            \n\t"	"psubw %%mm1, %%mm2            \n\t" //'t5	"paddw %%mm1, %%mm3            \n\t"	"movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"	"movq %%mm7, %%mm4             \n\t"	"paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t"	"psubw %%mm2, %%mm4            \n\t"	"paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t"	"paddw %%mm2, %%mm7            \n\t"	"movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"	"paddw %%mm2, %%mm0            \n\t" //'t4     	// 't4 't6 't5 - - - - 't7	"movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"	"movq %%mm6, %%mm1             \n\t"	"paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t"	"psubw %%mm0, %%mm1            \n\t"	"paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t"	"paddw %%mm0, %%mm6            \n\t"	"movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"	"add $24, %%"REG_S"              \n\t"	"movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"	"movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"	"add $24, %%"REG_D"              \n\t"	"sub $2, %%"REG_c"               \n\t"	"jnz 1b                \n\t"	"5:                      \n\t"	: "+S"(data), "+D"(output), "+c"(cnt)// input regs	: "d"(thr_adr)	: "%"REG_a	);}#endif // HAVE_MMX#ifndef HAVE_MMXstatic void row_idct_c(DCTELEM* workspace,		       int16_t* output_adr, int output_stride, int cnt){    int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;    int_simd16_t tmp10, tmp11, tmp12, tmp13;    int_simd16_t z5, z10, z11, z12, z13;    int16_t* outptr;    DCTELEM* wsptr;        cnt*=4;    wsptr = workspace;    outptr = output_adr;    for (; cnt > 0; cnt--) {    	// Even part 	//Simd version reads 4x4 block and transposes it    	tmp10 = ( wsptr[2] +  wsptr[3]);	tmp11 = ( wsptr[2] -  wsptr[3]);	tmp13 = ( wsptr[0] +  wsptr[1]);	tmp12 = (MULTIPLY16H( wsptr[0] - wsptr[1], FIX_1_414213562_A)<<2) - tmp13;//this shift order to avoid overflow	tmp0 = tmp10 + tmp13; //->temps	tmp3 = tmp10 - tmp13; //->temps	tmp1 = tmp11 + tmp12;	tmp2 = tmp11 - tmp12;	// Odd part 	//Also transpose, with previous:	// ---- ----      ||||	// ---- ---- idct ||||	// ---- ---- ---> ||||	// ---- ----      ||||	z13 = wsptr[4] + wsptr[5];	z10 = wsptr[4] - wsptr[5];	z11 = wsptr[6] + wsptr[7];	z12 = wsptr[6] - wsptr[7];	tmp7 = z11 + z13;   	tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);	z5 =    MULTIPLY16H(z10 + z12, FIX_1_847759065);	tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;	tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_	tmp6 = (tmp12<<3) - tmp7;	tmp5 = (tmp11<<3) - tmp6;	tmp4 = (tmp10<<3) + tmp5;	// Final output stage: descale and write column	outptr[0*output_stride]+= DESCALE(tmp0 + tmp7, 3);	outptr[1*output_stride]+= DESCALE(tmp1 + tmp6, 3);	outptr[2*output_stride]+= DESCALE(tmp2 + tmp5, 3);	outptr[3*output_stride]+= DESCALE(tmp3 - tmp4, 3);	outptr[4*output_stride]+= DESCALE(tmp3 + tmp4, 3);	outptr[5*output_stride]+= DESCALE(tmp2 - tmp5, 3);	outptr[6*output_stride]+= DESCALE(tmp1 - tmp6, 3); //no += ?	outptr[7*output_stride]+= DESCALE(tmp0 - tmp7, 3); //no += ?	outptr++;	wsptr += DCTSIZE;       // advance pointer to next row         }}#else /* HAVE_MMX */static void row_idct_mmx (DCTELEM* workspace, 			  int16_t* output_adr,  int output_stride,  int cnt){    asm volatile(	"lea (%%"REG_a",%%"REG_a",2), %%"REG_d"    \n\t"	"1:                     \n\t"	"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm0 \n\t"	//	"movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm1 \n\t"	"movq %%mm0, %%mm4             \n\t"	"movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"	"punpcklwd %%mm1, %%mm0        \n\t"	"movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm3 \n\t"	"punpckhwd %%mm1, %%mm4        \n\t"	//transpose 4x4	"movq %%mm2, %%mm7             \n\t"	"punpcklwd %%mm3, %%mm2        \n\t"

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?