vf_fspp.c

来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 2,126 行 · 第 1/5 页

C
2,126
字号
	"movq %%mm0, %%mm6             \n\t"	"punpckldq %%mm2, %%mm0        \n\t" //0	"punpckhdq %%mm2, %%mm6        \n\t" //1	"movq %%mm0, %%mm5             \n\t"	"punpckhwd %%mm3, %%mm7        \n\t"	"psubw %%mm6, %%mm0            \n\t"	"pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm0 \n\t"	"movq %%mm4, %%mm2             \n\t"	"punpckldq %%mm7, %%mm4        \n\t" //2	"paddw %%mm6, %%mm5            \n\t"	"punpckhdq %%mm7, %%mm2        \n\t" //3	"movq %%mm4, %%mm1             \n\t"	"psllw $2, %%mm0              \n\t"	"paddw %%mm2, %%mm4            \n\t" //t10	"movq "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_S"), %%mm3 \n\t"	"psubw %%mm2, %%mm1            \n\t" //t11	"movq "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_S"), %%mm2 \n\t"	"psubw %%mm5, %%mm0            \n\t"	"movq %%mm4, %%mm6             \n\t"	"paddw %%mm5, %%mm4            \n\t" //t0	"psubw %%mm5, %%mm6            \n\t" //t3	"movq %%mm1, %%mm7             \n\t"	"movq "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_S"), %%mm5 \n\t"	"paddw %%mm0, %%mm1            \n\t" //t1	"movq %%mm4, "MANGLE(temps)"+0*8       \n\t" //t0	"movq %%mm3, %%mm4             \n\t"	"movq %%mm6, "MANGLE(temps)"+1*8       \n\t" //t3	"punpcklwd %%mm2, %%mm3        \n\t"	//transpose 4x4    	"movq "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_S"), %%mm6 \n\t"	"punpckhwd %%mm2, %%mm4        \n\t"	"movq %%mm5, %%mm2             \n\t"	"punpcklwd %%mm6, %%mm5        \n\t"	"psubw %%mm0, %%mm7            \n\t" //t2    	"punpckhwd %%mm6, %%mm2        \n\t"	"movq %%mm3, %%mm0             \n\t"	"punpckldq %%mm5, %%mm3        \n\t" //4	"punpckhdq %%mm5, %%mm0        \n\t" //5	"movq %%mm4, %%mm5             \n\t"	//	"movq %%mm3, %%mm6             \n\t"	"punpckldq %%mm2, %%mm4        \n\t" //6	"psubw %%mm0, %%mm3            \n\t" //z10	"punpckhdq %%mm2, %%mm5        \n\t" //7     	"paddw %%mm0, %%mm6            \n\t" //z13	"movq %%mm4, %%mm2             \n\t"	"movq %%mm3, %%mm0             \n\t"	"psubw %%mm5, %%mm4            \n\t" //z12    	"pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm0 \n\t" //-	"paddw %%mm4, %%mm3            \n\t"	"pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm3 \n\t" //z5	"paddw %%mm5, %%mm2            \n\t" //z11  >	"pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm4 \n\t"	"movq %%mm2, %%mm5             \n\t"	"psubw %%mm6, %%mm2            \n\t"	"paddw %%mm6, %%mm5            \n\t" //t7	"pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //t11    	"paddw %%mm3, %%mm0            \n\t" //t12	"psllw $3, %%mm0              \n\t"	"psubw %%mm3, %%mm4            \n\t" //t10    	"movq "MANGLE(temps)"+0*8, %%mm6       \n\t"	"movq %%mm1, %%mm3             \n\t"	"psllw $3, %%mm4              \n\t"	"psubw %%mm5, %%mm0            \n\t" //t6	"psllw $3, %%mm2              \n\t"	"paddw %%mm0, %%mm1            \n\t" //d1	"psubw %%mm0, %%mm2            \n\t" //t5	"psubw %%mm0, %%mm3            \n\t" //d6         	"paddw %%mm2, %%mm4            \n\t" //t4	"movq %%mm7, %%mm0             \n\t"	"paddw %%mm2, %%mm7            \n\t" //d2	"psubw %%mm2, %%mm0            \n\t" //d5	"movq "MANGLE(MM_DESCALE_RND)", %%mm2   \n\t" //4	"psubw %%mm5, %%mm6            \n\t" //d7	"paddw "MANGLE(temps)"+0*8, %%mm5      \n\t" //d0	"paddw %%mm2, %%mm1            \n\t"	"paddw %%mm2, %%mm5            \n\t"	"psraw $3, %%mm1              \n\t"	"paddw %%mm2, %%mm7            \n\t"	"psraw $3, %%mm5              \n\t"	"paddw (%%"REG_D"), %%mm5          \n\t"	"psraw $3, %%mm7              \n\t"	"paddw (%%"REG_D",%%"REG_a",), %%mm1    \n\t"	"paddw %%mm2, %%mm0            \n\t"	"paddw (%%"REG_D",%%"REG_a",2), %%mm7   \n\t"	"paddw %%mm2, %%mm3            \n\t"	"movq %%mm5, (%%"REG_D")           \n\t"	"paddw %%mm2, %%mm6            \n\t"	"movq %%mm1, (%%"REG_D",%%"REG_a",)     \n\t"	"psraw $3, %%mm0              \n\t"	"movq %%mm7, (%%"REG_D",%%"REG_a",2)    \n\t"	"add %%"REG_d", %%"REG_D"             \n\t" //3*ls	"movq "MANGLE(temps)"+1*8, %%mm5       \n\t" //t3	"psraw $3, %%mm3              \n\t"	"paddw (%%"REG_D",%%"REG_a",2), %%mm0   \n\t"	"psubw %%mm4, %%mm5            \n\t" //d3	"paddw (%%"REG_D",%%"REG_d",), %%mm3    \n\t"	"psraw $3, %%mm6              \n\t"	"paddw "MANGLE(temps)"+1*8, %%mm4      \n\t" //d4        	"paddw %%mm2, %%mm5            \n\t"	"paddw (%%"REG_D",%%"REG_a",4), %%mm6   \n\t"	"paddw %%mm2, %%mm4            \n\t"	"movq %%mm0, (%%"REG_D",%%"REG_a",2)    \n\t"	"psraw $3, %%mm5              \n\t"	"paddw (%%"REG_D"), %%mm5          \n\t"	"psraw $3, %%mm4              \n\t"	"paddw (%%"REG_D",%%"REG_a",), %%mm4    \n\t"	"add $"DCTSIZE_S"*2*4, %%"REG_S"      \n\t" //4 rows	"movq %%mm3, (%%"REG_D",%%"REG_d",)     \n\t"	"movq %%mm6, (%%"REG_D",%%"REG_a",4)    \n\t"	"movq %%mm5, (%%"REG_D")           \n\t"	"movq %%mm4, (%%"REG_D",%%"REG_a",)     \n\t"	"sub %%"REG_d", %%"REG_D"             \n\t"	"add $8, %%"REG_D"               \n\t"	"dec %%"REG_c"                   \n\t"	"jnz 1b                  \n\t"	: "+S"(workspace), "+D"(output_adr), "+c"(cnt) //input regs	: "a"(output_stride*sizeof(short))	: "%"REG_d	);}#endif // HAVE_MMX#ifndef HAVE_MMXstatic void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt){    int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;    int_simd16_t tmp10, tmp11, tmp12, tmp13;    int_simd16_t z1, z2, z3, z4, z5, z11, z13;    DCTELEM *dataptr;        cnt*=4;    // Pass 1: process rows.       dataptr = data;    for (; cnt > 0; cnt--) {    	tmp0 = pixels[line_size*0] + pixels[line_size*7];	tmp7 = pixels[line_size*0] - pixels[line_size*7];	tmp1 = pixels[line_size*1] + pixels[line_size*6];	tmp6 = pixels[line_size*1] - pixels[line_size*6];	tmp2 = pixels[line_size*2] + pixels[line_size*5];	tmp5 = pixels[line_size*2] - pixels[line_size*5];	tmp3 = pixels[line_size*3] + pixels[line_size*4];	tmp4 = pixels[line_size*3] - pixels[line_size*4];    	// Even part     	tmp10 = tmp0 + tmp3;    	tmp13 = tmp0 - tmp3;	tmp11 = tmp1 + tmp2;	tmp12 = tmp1 - tmp2;	//Even columns are written first, this leads to different order of columns 	//in column_fidct(), but they are processed independently, so all ok.	//Later in the row_idct() columns readed at the same order.	dataptr[2] = tmp10 + tmp11; 	dataptr[3] = tmp10 - tmp11;    	z1 = MULTIPLY16H((tmp12 + tmp13)<<2, FIX_0_707106781);	dataptr[0] = tmp13 + z1;    	dataptr[1] = tmp13 - z1;    	// Odd part 	tmp10 = (tmp4 + tmp5) <<2;  	tmp11 = (tmp5 + tmp6) <<2;	tmp12 = (tmp6 + tmp7) <<2;	z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);	z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;	z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;	z3 = MULTIPLY16H(tmp11, FIX_0_707106781);	z11 = tmp7 + z3;	z13 = tmp7 - z3;	dataptr[4] = z13 + z2;	dataptr[5] = z13 - z2;	dataptr[6] = z11 + z4;	dataptr[7] = z11 - z4;	pixels++;               // advance pointer to next column	dataptr += DCTSIZE;             }}#else /* HAVE_MMX */static void row_fdct_mmx(DCTELEM *data,  const uint8_t *pixels,  int line_size,  int cnt){    asm volatile(	"lea (%%"REG_a",%%"REG_a",2), %%"REG_d"    \n\t"	"6:                     \n\t"	"movd (%%"REG_S"), %%mm0           \n\t"	"pxor %%mm7, %%mm7             \n\t"	"movd (%%"REG_S",%%"REG_a",), %%mm1     \n\t"	"punpcklbw %%mm7, %%mm0        \n\t"	"movd (%%"REG_S",%%"REG_a",2), %%mm2    \n\t"	"punpcklbw %%mm7, %%mm1        \n\t"	"punpcklbw %%mm7, %%mm2        \n\t"	"add %%"REG_d", %%"REG_S"             \n\t"	"movq %%mm0, %%mm5             \n\t"	//       	"movd (%%"REG_S",%%"REG_a",4), %%mm3    \n\t" //7  ;prefetch!	"movq %%mm1, %%mm6             \n\t"	"movd (%%"REG_S",%%"REG_d",), %%mm4     \n\t" //6	"punpcklbw %%mm7, %%mm3        \n\t"	"psubw %%mm3, %%mm5            \n\t"	"punpcklbw %%mm7, %%mm4        \n\t"	"paddw %%mm3, %%mm0            \n\t"	"psubw %%mm4, %%mm6            \n\t"	"movd (%%"REG_S",%%"REG_a",2), %%mm3    \n\t" //5	"paddw %%mm4, %%mm1            \n\t"	"movq %%mm5, "MANGLE(temps)"+0*8       \n\t" //t7	"punpcklbw %%mm7, %%mm3        \n\t"	"movq %%mm6, "MANGLE(temps)"+1*8       \n\t" //t6	"movq %%mm2, %%mm4             \n\t"	"movd (%%"REG_S"), %%mm5           \n\t" //3	"paddw %%mm3, %%mm2            \n\t"	"movd (%%"REG_S",%%"REG_a",), %%mm6     \n\t" //4	"punpcklbw %%mm7, %%mm5        \n\t"	"psubw %%mm3, %%mm4            \n\t"	"punpcklbw %%mm7, %%mm6        \n\t"	"movq %%mm5, %%mm3             \n\t"	"paddw %%mm6, %%mm5            \n\t" //t3	"psubw %%mm6, %%mm3            \n\t" //t4  ; t0 t1 t2 t4 t5 t3 - -	"movq %%mm0, %%mm6             \n\t"	"movq %%mm1, %%mm7             \n\t"	"psubw %%mm5, %%mm0            \n\t" //t13	"psubw %%mm2, %%mm1            \n\t"	"paddw %%mm2, %%mm7            \n\t" //t11    	"paddw %%mm0, %%mm1            \n\t"	"movq %%mm7, %%mm2             \n\t"	"psllw $2, %%mm1              \n\t"	"paddw %%mm5, %%mm6            \n\t" //t10	"pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm1 \n\t"	"paddw %%mm6, %%mm7            \n\t" //d2	"psubw %%mm2, %%mm6            \n\t" //d3	"movq %%mm0, %%mm5             \n\t"	//transpose 4x4	"movq %%mm7, %%mm2             \n\t"	"punpcklwd %%mm6, %%mm7        \n\t"	"paddw %%mm1, %%mm0            \n\t" //d0	"punpckhwd %%mm6, %%mm2        \n\t"	"psubw %%mm1, %%mm5            \n\t" //d1                	"movq %%mm0, %%mm6             \n\t"	"movq "MANGLE(temps)"+1*8, %%mm1       \n\t"	"punpcklwd %%mm5, %%mm0        \n\t"	"punpckhwd %%mm5, %%mm6        \n\t"	"movq %%mm0, %%mm5             \n\t"	"punpckldq %%mm7, %%mm0        \n\t" //0	"paddw %%mm4, %%mm3            \n\t"	"punpckhdq %%mm7, %%mm5        \n\t" //1	"movq %%mm6, %%mm7             \n\t"	"movq %%mm0, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"	"punpckldq %%mm2, %%mm6        \n\t" //2     	"movq %%mm5, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"	"punpckhdq %%mm2, %%mm7        \n\t" //3    	"movq %%mm6, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"	"paddw %%mm1, %%mm4            \n\t"	"movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"	"psllw $2, %%mm3              \n\t" //t10    	"movq "MANGLE(temps)"+0*8, %%mm2       \n\t"	"psllw $2, %%mm4              \n\t" //t11	"pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm4 \n\t" //z3	"paddw %%mm2, %%mm1            \n\t"	"psllw $2, %%mm1              \n\t" //t12	"movq %%mm3, %%mm0             \n\t"	"pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm0 \n\t"	"psubw %%mm1, %%mm3            \n\t"	"pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" //z5	"movq %%mm2, %%mm5             \n\t"	"pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm1 \n\t"	"psubw %%mm4, %%mm2            \n\t" //z13	"paddw %%mm4, %%mm5            \n\t" //z11	"movq %%mm2, %%mm6             \n\t"	"paddw %%mm3, %%mm0            \n\t" //z2	"movq %%mm5, %%mm7             \n\t"	"paddw %%mm0, %%mm2            \n\t" //d4	"psubw %%mm0, %%mm6            \n\t" //d5    	"movq %%mm2, %%mm4             \n\t"	"paddw %%mm3, %%mm1            \n\t" //z4    	//transpose 4x4	"punpcklwd %%mm6, %%mm2        \n\t"	"paddw %%mm1, %%mm5            \n\t" //d6	"punpckhwd %%mm6, %%mm4        \n\t"	"psubw %%mm1, %%mm7            \n\t" //d7    	"movq %%mm5, %%mm6             \n\t"	"punpcklwd %%mm7, %%mm5        \n\t"	"punpckhwd %%mm7, %%mm6        \n\t"	"movq %%mm2, %%mm7             \n\t"	"punpckldq %%mm5, %%mm2        \n\t" //4	"sub %%"REG_d", %%"REG_S"             \n\t"	"punpckhdq %%mm5, %%mm7        \n\t" //5	"movq %%mm4, %%mm5             \n\t"	"movq %%mm2, "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_D") \n\t"	"punpckldq %%mm6, %%mm4        \n\t" //6	"movq %%mm7, "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_D") \n\t"	"punpckhdq %%mm6, %%mm5        \n\t" //7    	"movq %%mm4, "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_D") \n\t"	"add $4, %%"REG_S"               \n\t"	"movq %%mm5, "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_D") \n\t"	"add $"DCTSIZE_S"*2*4, %%"REG_D"      \n\t" //4 rows    	"dec %%"REG_c"                   \n\t"	"jnz 6b                  \n\t"	: "+S"(pixels), "+D"(data), "+c"(cnt) //input regs	: "a"(line_size)	: "%"REG_d);}#endif // HAVE_MMX

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?