vf_fspp.c

来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 2,126 行 · 第 1/5 页

C
2,126
字号
	    tmp7 = (z11 + z13)>>2; //+2 !	    tmp11 = MULTIPLY16H((z11 - z13)<<1, FIX_1_414213562);	    z5 =    MULTIPLY16H(z10 + z12, FIX_1_847759065);	    tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;	    tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!	    tmp6 = tmp12 - tmp7;	    tmp5 = tmp11 - tmp6;	    tmp4 = tmp10 + tmp5;	    wsptr[DCTSIZE*0]+=  (tmp0 + tmp7);	    wsptr[DCTSIZE*1]+=  (tmp1 + tmp6);	    wsptr[DCTSIZE*2]+=  (tmp2 + tmp5);	    wsptr[DCTSIZE*3]+=  (tmp3 - tmp4);	    wsptr[DCTSIZE*4]+=  (tmp3 + tmp4);	    wsptr[DCTSIZE*5]+=  (tmp2 - tmp5);	    wsptr[DCTSIZE*6]=  (tmp1 - tmp6);	    wsptr[DCTSIZE*7]=  (tmp0 - tmp7);	    //	    dataptr++; //next column	    wsptr++;	    threshold++;	}	dataptr+=8; //skip each second start pos	wsptr  +=8;           }}#else /* HAVE_MMX */static void column_fidct_mmx(int16_t* thr_adr,  DCTELEM *data,  DCTELEM *output,  int cnt){    asm volatile(	ASMALIGN(4)	"1:                   \n\t"	"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"	//	"movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t"	"movq %%mm1, %%mm0             \n\t"	"paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0    	"movq %%mm7, %%mm3             \n\t"	"paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3	"movq %%mm1, %%mm5             \n\t"	"movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t"	"psubw %%mm7, %%mm1            \n\t" //t13	"movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"	"movq %%mm6, %%mm4             \n\t"	"paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1	"paddw %%mm7, %%mm5            \n\t" //t10	"paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2	"movq %%mm6, %%mm7             \n\t"	"paddw %%mm2, %%mm6            \n\t" //t11    	"psubw %%mm2, %%mm7            \n\t" //t12	"movq %%mm5, %%mm2             \n\t"	"paddw %%mm6, %%mm5            \n\t" //d0	// i0 t13 t12 i3 i1 d0 - d4	"psubw %%mm6, %%mm2            \n\t" //d4      	"paddw %%mm1, %%mm7            \n\t"	"movq  4*16(%%"REG_d"), %%mm6      \n\t"	"psllw $2, %%mm7              \n\t"	"psubw 0*16(%%"REG_d"), %%mm5      \n\t"	"psubw %%mm6, %%mm2            \n\t"	"paddusw 0*16(%%"REG_d"), %%mm5    \n\t"	"paddusw %%mm6, %%mm2          \n\t"	"pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm7 \n\t"	//	"paddw 0*16(%%"REG_d"), %%mm5      \n\t"	"paddw %%mm6, %%mm2            \n\t"	"psubusw 0*16(%%"REG_d"), %%mm5    \n\t"	"psubusw %%mm6, %%mm2          \n\t"//This func is totally compute-bound,  operates at huge speed. So,  DC shortcut// at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3).//However,  typical numbers: nondc - 29%%,  dc - 46%%,  zero - 25%%. All <> 0 case is very rare.	"paddw "MANGLE(MM_2)", %%mm5            \n\t"	"movq %%mm2, %%mm6             \n\t"	"paddw %%mm5, %%mm2            \n\t"	"psubw %%mm6, %%mm5            \n\t"	"movq %%mm1, %%mm6             \n\t"	"paddw %%mm7, %%mm1            \n\t" //d2	"psubw 2*16(%%"REG_d"), %%mm1      \n\t"	"psubw %%mm7, %%mm6            \n\t" //d6	"movq 6*16(%%"REG_d"), %%mm7       \n\t"	"psraw $2, %%mm5              \n\t"	"paddusw 2*16(%%"REG_d"), %%mm1    \n\t"	"psubw %%mm7, %%mm6            \n\t"	// t7 d2 /t11 t4 t6 - d6 /t10     	"paddw 2*16(%%"REG_d"), %%mm1      \n\t"	"paddusw %%mm7, %%mm6          \n\t"	"psubusw 2*16(%%"REG_d"), %%mm1    \n\t"	"paddw %%mm7, %%mm6            \n\t"	"psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t"	"psubusw %%mm7, %%mm6          \n\t"	//movq [edi+"DCTSIZE_S"*2*2], mm1	//movq [edi+"DCTSIZE_S"*6*2], mm6     	"movq %%mm1, %%mm7             \n\t"	"psraw $2, %%mm2              \n\t"	"psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t"	"psubw %%mm6, %%mm1            \n\t"	"psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t"	"paddw %%mm7, %%mm6            \n\t" //'t13	"psraw $2, %%mm6              \n\t" //paddw mm6, MM_2 !!    ---	"movq %%mm2, %%mm7             \n\t"	"pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"	"paddw %%mm6, %%mm2            \n\t" //'t0	"movq %%mm2, "MANGLE(temps)"+0*8       \n\t" //!	"psubw %%mm6, %%mm7            \n\t" //'t3	"movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"	"psubw %%mm6, %%mm1            \n\t" //'t12        	"psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5	"movq %%mm5, %%mm6             \n\t"	"movq %%mm7, "MANGLE(temps)"+3*8       \n\t"	"paddw %%mm2, %%mm3            \n\t" //t10	"paddw %%mm4, %%mm2            \n\t" //t11	"paddw %%mm0, %%mm4            \n\t" //t12	"movq %%mm3, %%mm7             \n\t"	"psubw %%mm4, %%mm3            \n\t"	"psllw $2, %%mm3              \n\t"	"psllw $2, %%mm7              \n\t" //opt for P6	"pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t"	"psllw $2, %%mm4              \n\t"	"pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm7 \n\t"	"psllw $2, %%mm2              \n\t"	"pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t"	"paddw %%mm1, %%mm5            \n\t" //'t1	"pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm2 \n\t"	"psubw %%mm1, %%mm6            \n\t" //'t2	// t7 't12 't11 t4 t6 - 't13 't10   ---	"paddw %%mm3, %%mm7            \n\t" //z2        	"movq %%mm5, "MANGLE(temps)"+1*8       \n\t"	"paddw %%mm3, %%mm4            \n\t" //z4	"movq 3*16(%%"REG_d"), %%mm3       \n\t"	"movq %%mm0, %%mm1             \n\t"	"movq %%mm6, "MANGLE(temps)"+2*8       \n\t"	"psubw %%mm2, %%mm1            \n\t" //z13            //===	"paddw %%mm2, %%mm0            \n\t" //z11 	"movq %%mm1, %%mm5             \n\t"	"movq 5*16(%%"REG_d"), %%mm2       \n\t"	"psubw %%mm7, %%mm1            \n\t" //d3	"paddw %%mm7, %%mm5            \n\t" //d5	"psubw %%mm3, %%mm1            \n\t"	"movq 1*16(%%"REG_d"), %%mm7       \n\t"	"psubw %%mm2, %%mm5            \n\t"	"movq %%mm0, %%mm6             \n\t"	"paddw %%mm4, %%mm0            \n\t" //d1    	"paddusw %%mm3, %%mm1          \n\t"	"psubw %%mm4, %%mm6            \n\t" //d7  	// d1 d3 - - - d5 d7 -    	"movq 7*16(%%"REG_d"), %%mm4       \n\t"	"psubw %%mm7, %%mm0            \n\t"	"psubw %%mm4, %%mm6            \n\t"	"paddusw %%mm2, %%mm5          \n\t"	"paddusw %%mm4, %%mm6          \n\t"	"paddw %%mm3, %%mm1            \n\t"	"paddw %%mm2, %%mm5            \n\t"	"paddw %%mm4, %%mm6            \n\t"	"psubusw %%mm3, %%mm1          \n\t"	"psubusw %%mm2, %%mm5          \n\t"	"psubusw %%mm4, %%mm6          \n\t"	"movq %%mm1, %%mm4             \n\t"	"por %%mm5, %%mm4              \n\t"	"paddusw %%mm7, %%mm0          \n\t"	"por %%mm6, %%mm4              \n\t"	"paddw %%mm7, %%mm0            \n\t"	"packssdw %%mm4, %%mm4         \n\t"	"psubusw %%mm7, %%mm0          \n\t"	"movd %%mm4, %%"REG_a"             \n\t"	"or %%"REG_a", %%"REG_a"              \n\t"	"jnz 2f                 \n\t"	//movq [edi+"DCTSIZE_S"*3*2], mm1	//movq [edi+"DCTSIZE_S"*5*2], mm5	//movq [edi+"DCTSIZE_S"*1*2], mm0	//movq [edi+"DCTSIZE_S"*7*2], mm6	// t4 t5 - - - t6 t7 -	//--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0//Typical numbers: nondc - 19%%,  dc - 26%%,  zero - 55%%. zero case alone isn't worthwhile	"movq "MANGLE(temps)"+0*8, %%mm4       \n\t"	"movq %%mm0, %%mm1             \n\t"	"pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6	"movq %%mm1, %%mm2             \n\t"	"movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t"	"movq %%mm2, %%mm3             \n\t"	"pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5	"paddw %%mm4, %%mm5            \n\t"	"movq "MANGLE(temps)"+1*8, %%mm6       \n\t"	//paddw mm3, MM_2	"psraw $2, %%mm3              \n\t" //tmp7     	"pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4	"psubw %%mm3, %%mm4            \n\t"	"movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t"	"paddw %%mm3, %%mm5            \n\t"	"movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"	"paddw %%mm6, %%mm7            \n\t"	"movq "MANGLE(temps)"+2*8, %%mm3       \n\t"	"psubw %%mm0, %%mm6            \n\t"	"movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"	"paddw %%mm0, %%mm7            \n\t"	"movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"	"paddw %%mm3, %%mm4            \n\t"	"movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"	"psubw %%mm1, %%mm3            \n\t"	"movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t"	"paddw %%mm1, %%mm4            \n\t"	"movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"	"paddw %%mm3, %%mm5            \n\t"	"movq "MANGLE(temps)"+3*8, %%mm0       \n\t"	"add $8, %%"REG_S"               \n\t"	"movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"	"paddw %%mm0, %%mm6            \n\t"	"movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"	"psubw %%mm2, %%mm0            \n\t"	"movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t"	"paddw %%mm2, %%mm6            \n\t"	"movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"	"paddw %%mm0, %%mm7            \n\t"	"movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"	"movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"	"add $8, %%"REG_D"               \n\t"	"jmp 4f                  \n\t"	"2:                    \n\t"	//--- non DC2	//psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1  (actually thr1, thr1, thr1-1)	//psraw mm5, 2              	//psraw mm0, 2	//psraw mm6, 2	"movq %%mm5, %%mm3             \n\t"	"psubw %%mm1, %%mm5            \n\t"	"psllw $1, %%mm5              \n\t" //'z10	"paddw %%mm1, %%mm3            \n\t" //'z13	"movq %%mm0, %%mm2             \n\t"	"psubw %%mm6, %%mm0            \n\t"	"movq %%mm5, %%mm1             \n\t"	"psllw $1, %%mm0              \n\t" //'z12	"pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //-	"paddw %%mm0, %%mm5            \n\t"	"pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5	"paddw %%mm6, %%mm2            \n\t" //'z11	"pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t"	"movq %%mm2, %%mm7             \n\t"	//---	"movq "MANGLE(temps)"+0*8, %%mm4       \n\t"	"psubw %%mm3, %%mm2            \n\t"	"psllw $1, %%mm2              \n\t"	"paddw %%mm3, %%mm7            \n\t" //'t7	"pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11	"movq %%mm4, %%mm6             \n\t"	//paddw mm7, MM_2	"psraw $2, %%mm7              \n\t"	"paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"	"psubw %%mm7, %%mm6            \n\t"	"movq "MANGLE(temps)"+1*8, %%mm3       \n\t"	"paddw %%mm7, %%mm4            \n\t"	"movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"	"paddw %%mm5, %%mm1            \n\t" //'t12	"movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"	"psubw %%mm7, %%mm1            \n\t" //'t6	"movq "MANGLE(temps)"+2*8, %%mm7       \n\t"	"psubw %%mm5, %%mm0            \n\t" //'t10	"movq "MANGLE(temps)"+3*8, %%mm6       \n\t"	"movq %%mm3, %%mm5             \n\t"	"paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"	"psubw %%mm1, %%mm5            \n\t"	"psubw %%mm1, %%mm2            \n\t" //'t5	"paddw %%mm1, %%mm3            \n\t"	"movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"	"movq %%mm7, %%mm4             \n\t"	"paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t"	"psubw %%mm2, %%mm4            \n\t"	"paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t"	"paddw %%mm2, %%mm7            \n\t"	"movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"	"paddw %%mm2, %%mm0            \n\t" //'t4     	// 't4 't6 't5 - - - - 't7	"movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"	"movq %%mm6, %%mm1             \n\t"	"paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t"	"psubw %%mm0, %%mm1            \n\t"	"paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t"	"paddw %%mm0, %%mm6            \n\t"	"movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"	"add $8, %%"REG_S"               \n\t"	"movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"	"movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"	"add $8, %%"REG_D"               \n\t"	"4:                     \n\t"//=part 2 (the same)===========================================================    	"movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"	//	"movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t"	"movq %%mm1, %%mm0             \n\t"	"paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0    	"movq %%mm7, %%mm3             \n\t"	"paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3	"movq %%mm1, %%mm5             \n\t"	"movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t"	"psubw %%mm7, %%mm1            \n\t" //t13	"movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"	"movq %%mm6, %%mm4             \n\t"	"paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1	"paddw %%mm7, %%mm5            \n\t" //t10	"paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2	"movq %%mm6, %%mm7             \n\t"	"paddw %%mm2, %%mm6            \n\t" //t11    	"psubw %%mm2, %%mm7            \n\t" //t12	"movq %%mm5, %%mm2             \n\t"	"paddw %%mm6, %%mm5            \n\t" //d0	// i0 t13 t12 i3 i1 d0 - d4	"psubw %%mm6, %%mm2            \n\t" //d4      	"paddw %%mm1, %%mm7            \n\t"

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?