vf_fspp.c
来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 2,126 行 · 第 1/5 页
C
2,126 行
"movq %%mm0, %%mm6 \n\t" "punpckldq %%mm2, %%mm0 \n\t" //0 "punpckhdq %%mm2, %%mm6 \n\t" //1 "movq %%mm0, %%mm5 \n\t" "punpckhwd %%mm3, %%mm7 \n\t" "psubw %%mm6, %%mm0 \n\t" "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm0 \n\t" "movq %%mm4, %%mm2 \n\t" "punpckldq %%mm7, %%mm4 \n\t" //2 "paddw %%mm6, %%mm5 \n\t" "punpckhdq %%mm7, %%mm2 \n\t" //3 "movq %%mm4, %%mm1 \n\t" "psllw $2, %%mm0 \n\t" "paddw %%mm2, %%mm4 \n\t" //t10 "movq "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_S"), %%mm3 \n\t" "psubw %%mm2, %%mm1 \n\t" //t11 "movq "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_S"), %%mm2 \n\t" "psubw %%mm5, %%mm0 \n\t" "movq %%mm4, %%mm6 \n\t" "paddw %%mm5, %%mm4 \n\t" //t0 "psubw %%mm5, %%mm6 \n\t" //t3 "movq %%mm1, %%mm7 \n\t" "movq "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_S"), %%mm5 \n\t" "paddw %%mm0, %%mm1 \n\t" //t1 "movq %%mm4, "MANGLE(temps)"+0*8 \n\t" //t0 "movq %%mm3, %%mm4 \n\t" "movq %%mm6, "MANGLE(temps)"+1*8 \n\t" //t3 "punpcklwd %%mm2, %%mm3 \n\t" //transpose 4x4 "movq "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_S"), %%mm6 \n\t" "punpckhwd %%mm2, %%mm4 \n\t" "movq %%mm5, %%mm2 \n\t" "punpcklwd %%mm6, %%mm5 \n\t" "psubw %%mm0, %%mm7 \n\t" //t2 "punpckhwd %%mm6, %%mm2 \n\t" "movq %%mm3, %%mm0 \n\t" "punpckldq %%mm5, %%mm3 \n\t" //4 "punpckhdq %%mm5, %%mm0 \n\t" //5 "movq %%mm4, %%mm5 \n\t" // "movq %%mm3, %%mm6 \n\t" "punpckldq %%mm2, %%mm4 \n\t" //6 "psubw %%mm0, %%mm3 \n\t" //z10 "punpckhdq %%mm2, %%mm5 \n\t" //7 "paddw %%mm0, %%mm6 \n\t" //z13 "movq %%mm4, %%mm2 \n\t" "movq %%mm3, %%mm0 \n\t" "psubw %%mm5, %%mm4 \n\t" //z12 "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm0 \n\t" //- "paddw %%mm4, %%mm3 \n\t" "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm3 \n\t" //z5 "paddw %%mm5, %%mm2 \n\t" //z11 > "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" "psubw %%mm6, %%mm2 \n\t" "paddw %%mm6, %%mm5 \n\t" //t7 "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //t11 "paddw %%mm3, %%mm0 \n\t" //t12 "psllw $3, %%mm0 \n\t" "psubw %%mm3, %%mm4 \n\t" //t10 "movq "MANGLE(temps)"+0*8, %%mm6 \n\t" "movq %%mm1, %%mm3 \n\t" "psllw $3, %%mm4 \n\t" "psubw %%mm5, %%mm0 \n\t" //t6 "psllw $3, %%mm2 \n\t" "paddw %%mm0, %%mm1 \n\t" //d1 "psubw %%mm0, %%mm2 \n\t" //t5 "psubw %%mm0, %%mm3 \n\t" //d6 "paddw %%mm2, %%mm4 \n\t" //t4 "movq %%mm7, %%mm0 \n\t" "paddw %%mm2, %%mm7 \n\t" //d2 "psubw %%mm2, %%mm0 \n\t" //d5 "movq "MANGLE(MM_DESCALE_RND)", %%mm2 \n\t" //4 "psubw %%mm5, %%mm6 \n\t" //d7 "paddw "MANGLE(temps)"+0*8, %%mm5 \n\t" //d0 "paddw %%mm2, %%mm1 \n\t" "paddw %%mm2, %%mm5 \n\t" "psraw $3, %%mm1 \n\t" "paddw %%mm2, %%mm7 \n\t" "psraw $3, %%mm5 \n\t" "paddw (%%"REG_D"), %%mm5 \n\t" "psraw $3, %%mm7 \n\t" "paddw (%%"REG_D",%%"REG_a",), %%mm1 \n\t" "paddw %%mm2, %%mm0 \n\t" "paddw (%%"REG_D",%%"REG_a",2), %%mm7 \n\t" "paddw %%mm2, %%mm3 \n\t" "movq %%mm5, (%%"REG_D") \n\t" "paddw %%mm2, %%mm6 \n\t" "movq %%mm1, (%%"REG_D",%%"REG_a",) \n\t" "psraw $3, %%mm0 \n\t" "movq %%mm7, (%%"REG_D",%%"REG_a",2) \n\t" "add %%"REG_d", %%"REG_D" \n\t" //3*ls "movq "MANGLE(temps)"+1*8, %%mm5 \n\t" //t3 "psraw $3, %%mm3 \n\t" "paddw (%%"REG_D",%%"REG_a",2), %%mm0 \n\t" "psubw %%mm4, %%mm5 \n\t" //d3 "paddw (%%"REG_D",%%"REG_d",), %%mm3 \n\t" "psraw $3, %%mm6 \n\t" "paddw "MANGLE(temps)"+1*8, %%mm4 \n\t" //d4 "paddw %%mm2, %%mm5 \n\t" "paddw (%%"REG_D",%%"REG_a",4), %%mm6 \n\t" "paddw %%mm2, %%mm4 \n\t" "movq %%mm0, (%%"REG_D",%%"REG_a",2) \n\t" "psraw $3, %%mm5 \n\t" "paddw (%%"REG_D"), %%mm5 \n\t" "psraw $3, %%mm4 \n\t" "paddw (%%"REG_D",%%"REG_a",), %%mm4 \n\t" "add $"DCTSIZE_S"*2*4, %%"REG_S" \n\t" //4 rows "movq %%mm3, (%%"REG_D",%%"REG_d",) \n\t" "movq %%mm6, (%%"REG_D",%%"REG_a",4) \n\t" "movq %%mm5, (%%"REG_D") \n\t" "movq %%mm4, (%%"REG_D",%%"REG_a",) \n\t" "sub %%"REG_d", %%"REG_D" \n\t" "add $8, %%"REG_D" \n\t" "dec %%"REG_c" \n\t" "jnz 1b \n\t" : "+S"(workspace), "+D"(output_adr), "+c"(cnt) //input regs : "a"(output_stride*sizeof(short)) : "%"REG_d );}#endif // HAVE_MMX#ifndef HAVE_MMXstatic void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt){ int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int_simd16_t tmp10, tmp11, tmp12, tmp13; int_simd16_t z1, z2, z3, z4, z5, z11, z13; DCTELEM *dataptr; cnt*=4; // Pass 1: process rows. dataptr = data; for (; cnt > 0; cnt--) { tmp0 = pixels[line_size*0] + pixels[line_size*7]; tmp7 = pixels[line_size*0] - pixels[line_size*7]; tmp1 = pixels[line_size*1] + pixels[line_size*6]; tmp6 = pixels[line_size*1] - pixels[line_size*6]; tmp2 = pixels[line_size*2] + pixels[line_size*5]; tmp5 = pixels[line_size*2] - pixels[line_size*5]; tmp3 = pixels[line_size*3] + pixels[line_size*4]; tmp4 = pixels[line_size*3] - pixels[line_size*4]; // Even part tmp10 = tmp0 + tmp3; tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; //Even columns are written first, this leads to different order of columns //in column_fidct(), but they are processed independently, so all ok. //Later in the row_idct() columns readed at the same order. dataptr[2] = tmp10 + tmp11; dataptr[3] = tmp10 - tmp11; z1 = MULTIPLY16H((tmp12 + tmp13)<<2, FIX_0_707106781); dataptr[0] = tmp13 + z1; dataptr[1] = tmp13 - z1; // Odd part tmp10 = (tmp4 + tmp5) <<2; tmp11 = (tmp5 + tmp6) <<2; tmp12 = (tmp6 + tmp7) <<2; z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433); z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5; z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5; z3 = MULTIPLY16H(tmp11, FIX_0_707106781); z11 = tmp7 + z3; z13 = tmp7 - z3; dataptr[4] = z13 + z2; dataptr[5] = z13 - z2; dataptr[6] = z11 + z4; dataptr[7] = z11 - z4; pixels++; // advance pointer to next column dataptr += DCTSIZE; }}#else /* HAVE_MMX */static void row_fdct_mmx(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt){ asm volatile( "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t" "6: \n\t" "movd (%%"REG_S"), %%mm0 \n\t" "pxor %%mm7, %%mm7 \n\t" "movd (%%"REG_S",%%"REG_a",), %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "movd (%%"REG_S",%%"REG_a",2), %%mm2 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "add %%"REG_d", %%"REG_S" \n\t" "movq %%mm0, %%mm5 \n\t" // "movd (%%"REG_S",%%"REG_a",4), %%mm3 \n\t" //7 ;prefetch! "movq %%mm1, %%mm6 \n\t" "movd (%%"REG_S",%%"REG_d",), %%mm4 \n\t" //6 "punpcklbw %%mm7, %%mm3 \n\t" "psubw %%mm3, %%mm5 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "paddw %%mm3, %%mm0 \n\t" "psubw %%mm4, %%mm6 \n\t" "movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5 "paddw %%mm4, %%mm1 \n\t" "movq %%mm5, "MANGLE(temps)"+0*8 \n\t" //t7 "punpcklbw %%mm7, %%mm3 \n\t" "movq %%mm6, "MANGLE(temps)"+1*8 \n\t" //t6 "movq %%mm2, %%mm4 \n\t" "movd (%%"REG_S"), %%mm5 \n\t" //3 "paddw %%mm3, %%mm2 \n\t" "movd (%%"REG_S",%%"REG_a",), %%mm6 \n\t" //4 "punpcklbw %%mm7, %%mm5 \n\t" "psubw %%mm3, %%mm4 \n\t" "punpcklbw %%mm7, %%mm6 \n\t" "movq %%mm5, %%mm3 \n\t" "paddw %%mm6, %%mm5 \n\t" //t3 "psubw %%mm6, %%mm3 \n\t" //t4 ; t0 t1 t2 t4 t5 t3 - - "movq %%mm0, %%mm6 \n\t" "movq %%mm1, %%mm7 \n\t" "psubw %%mm5, %%mm0 \n\t" //t13 "psubw %%mm2, %%mm1 \n\t" "paddw %%mm2, %%mm7 \n\t" //t11 "paddw %%mm0, %%mm1 \n\t" "movq %%mm7, %%mm2 \n\t" "psllw $2, %%mm1 \n\t" "paddw %%mm5, %%mm6 \n\t" //t10 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm1 \n\t" "paddw %%mm6, %%mm7 \n\t" //d2 "psubw %%mm2, %%mm6 \n\t" //d3 "movq %%mm0, %%mm5 \n\t" //transpose 4x4 "movq %%mm7, %%mm2 \n\t" "punpcklwd %%mm6, %%mm7 \n\t" "paddw %%mm1, %%mm0 \n\t" //d0 "punpckhwd %%mm6, %%mm2 \n\t" "psubw %%mm1, %%mm5 \n\t" //d1 "movq %%mm0, %%mm6 \n\t" "movq "MANGLE(temps)"+1*8, %%mm1 \n\t" "punpcklwd %%mm5, %%mm0 \n\t" "punpckhwd %%mm5, %%mm6 \n\t" "movq %%mm0, %%mm5 \n\t" "punpckldq %%mm7, %%mm0 \n\t" //0 "paddw %%mm4, %%mm3 \n\t" "punpckhdq %%mm7, %%mm5 \n\t" //1 "movq %%mm6, %%mm7 \n\t" "movq %%mm0, "DCTSIZE_S"*0*2(%%"REG_D") \n\t" "punpckldq %%mm2, %%mm6 \n\t" //2 "movq %%mm5, "DCTSIZE_S"*1*2(%%"REG_D") \n\t" "punpckhdq %%mm2, %%mm7 \n\t" //3 "movq %%mm6, "DCTSIZE_S"*2*2(%%"REG_D") \n\t" "paddw %%mm1, %%mm4 \n\t" "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t" "psllw $2, %%mm3 \n\t" //t10 "movq "MANGLE(temps)"+0*8, %%mm2 \n\t" "psllw $2, %%mm4 \n\t" //t11 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm4 \n\t" //z3 "paddw %%mm2, %%mm1 \n\t" "psllw $2, %%mm1 \n\t" //t12 "movq %%mm3, %%mm0 \n\t" "pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm0 \n\t" "psubw %%mm1, %%mm3 \n\t" "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" //z5 "movq %%mm2, %%mm5 \n\t" "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm1 \n\t" "psubw %%mm4, %%mm2 \n\t" //z13 "paddw %%mm4, %%mm5 \n\t" //z11 "movq %%mm2, %%mm6 \n\t" "paddw %%mm3, %%mm0 \n\t" //z2 "movq %%mm5, %%mm7 \n\t" "paddw %%mm0, %%mm2 \n\t" //d4 "psubw %%mm0, %%mm6 \n\t" //d5 "movq %%mm2, %%mm4 \n\t" "paddw %%mm3, %%mm1 \n\t" //z4 //transpose 4x4 "punpcklwd %%mm6, %%mm2 \n\t" "paddw %%mm1, %%mm5 \n\t" //d6 "punpckhwd %%mm6, %%mm4 \n\t" "psubw %%mm1, %%mm7 \n\t" //d7 "movq %%mm5, %%mm6 \n\t" "punpcklwd %%mm7, %%mm5 \n\t" "punpckhwd %%mm7, %%mm6 \n\t" "movq %%mm2, %%mm7 \n\t" "punpckldq %%mm5, %%mm2 \n\t" //4 "sub %%"REG_d", %%"REG_S" \n\t" "punpckhdq %%mm5, %%mm7 \n\t" //5 "movq %%mm4, %%mm5 \n\t" "movq %%mm2, "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_D") \n\t" "punpckldq %%mm6, %%mm4 \n\t" //6 "movq %%mm7, "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_D") \n\t" "punpckhdq %%mm6, %%mm5 \n\t" //7 "movq %%mm4, "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_D") \n\t" "add $4, %%"REG_S" \n\t" "movq %%mm5, "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_D") \n\t" "add $"DCTSIZE_S"*2*4, %%"REG_D" \n\t" //4 rows "dec %%"REG_c" \n\t" "jnz 6b \n\t" : "+S"(pixels), "+D"(data), "+c"(cnt) //input regs : "a"(line_size) : "%"REG_d);}#endif // HAVE_MMX
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?