📄 mcomp_sgl_mmx.cc
字号:
sp += mc->bytesperline_lum; dp += mc->bytesperline_lum; } } else { sp -= xshift; xshift <<= 3; // left bits to ignore uint64 xshift2 = 64-xshift; // right bits to ignore for (int y=mc->blkheight;y>0;y--) { // rp: mm1 mm2 mm3 -> mm1 mm3 // dp: mm2 mm4 __asm__ ( // Read data aligned: // Read 3x 8bytes and shift data to correct position. // 16 pixels are in "movq (%0),%%mm1\n\t" // rp[0] -> mm1 "movq 8(%0),%%mm2\n\t" // rp[1] -> mm2,mm4 (*1) " psrlq %2,%%mm1\n\t" // note: shifting right because of little endian "movq 16(%0),%%mm3\n\t" // rp[2] -> mm3 " movq %%mm2,%%mm4\n\t" // (*1) "psllq %3,%%mm4\n\t" "psrlq %2,%%mm2\n\t" " por %%mm4,%%mm1\n\t" // combine first 8 pixels into mm1 "psllq %3,%%mm3\n\t" "por %%mm2,%%mm3\n\t" // combine second 8 pixels into mm3 "movq (%1),%%mm2\n\t" // dp[0] " movq %%mm1,%%mm5\n\t" "movq 8(%1),%%mm4\n\t" // dp[1] " por %%mm2,%%mm5\n\t" "pand %%mm0,%%mm5\n\t" // mm5: rounding[0] " movq %%mm3,%%mm6\n\t" "por %%mm4,%%mm6\n\t" " pand %%mm7,%%mm1\n\t" // mask out LSB of rp[0] "pand %%mm0,%%mm6\n\t" // mm6: rounding[1] " pand %%mm7,%%mm3\n\t" // mask out LSB of rp[1] // calc mean "psrlq $1,%%mm1\n\t" " pand %%mm7,%%mm2\n\t" "psrlq $1,%%mm3\n\t" " pand %%mm7,%%mm4\n\t" "psrlq $1,%%mm2\n\t" " paddusb %%mm5, %%mm1\n\t" // rp[0] += rounding[0] "psrlq $1,%%mm4\n\t" " paddusb %%mm6, %%mm3\n\t" // rp[1] += rounding[1] "paddusb %%mm2, %%mm1\n\t" // rp[0] += dp[0] "movq %%mm1, (%1)\n\t" " paddusb %%mm4, %%mm3\n\t" // rp[1] += dp[1] "movq %%mm3,8(%1)\n\t" : : "r"(sp),"r"(dp),"m"(xshift),"m"(xshift2) ); sp += mc->bytesperline_lum; dp += mc->bytesperline_lum; } }#else for (int y=0;y<mc->blkheight;y++) { for (int x=0;x<16;x++) { *dp = (*dp + *sp + 1)>>1; dp++; sp++; } sp += mc->bytesperline_lum-16; dp += mc->bytesperline_lum-16; }#endif}static void LumaHFadd(MotionCompensation_SglMB::MCData* mc){ const Pixel* sp; Pixel* dp; sp = mc->nextimg.y; dp = mc->currimg.y; for (int y=0;y<mc->blkheight;y++) { for (int x=0;x<16;x++) { *dp = (*dp + ((*sp + sp[1] + 1)>>1) + 1)>>1; dp++; sp++; } sp += mc->bytesperline_lum-16; dp += mc->bytesperline_lum-16; }}static void LumaFHadd(MotionCompensation_SglMB::MCData* mc){ const Pixel* sp; Pixel* dp; sp = mc->nextimg.y; dp = mc->currimg.y; for (int y=0;y<mc->blkheight;y++) { for (int x=0;x<16;x++) { *dp = (*dp + ((*sp + sp[mc->bytesperline_lum] + 1)>>1) + 1)>>1; dp++; sp++; } sp += mc->bytesperline_lum-16; dp += mc->bytesperline_lum-16; }}static void LumaHHadd(MotionCompensation_SglMB::MCData* mc){ const Pixel* sp; Pixel* dp; sp = mc->nextimg.y; dp = mc->currimg.y; for (int y=0;y<mc->blkheight;y++) { for (int x=0;x<16;x++) { *dp = (*dp + ((*sp + sp[1] + sp[mc->bytesperline_lum] + sp[mc->bytesperline_lum+1] +2)>>2)+1)>>1; dp++; sp++; } sp += mc->bytesperline_lum-16; dp += mc->bytesperline_lum-16; }}static void ChromaFFadd(MotionCompensation_SglMB::MCData* mc){ const Pixel* sp = mc->nextimg.cr; Pixel* dp = mc->currimg.cr; int h = mc->blkheight_chr; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + *sp + 1)>>1; sp++; dp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; } sp = mc->nextimg.cb; dp = mc->currimg.cb; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + *sp + 1)>>1; sp++; dp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; }}static void ChromaFHadd(MotionCompensation_SglMB::MCData* mc){ const Pixel* sp = mc->nextimg.cr; Pixel* dp = mc->currimg.cr; int h = mc->blkheight_chr; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + ((*sp + sp[mc->bytesperline_chr]+1)>>1) + 1)>>1; dp++; sp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; } sp = mc->nextimg.cb; dp = mc->currimg.cb; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + ((*sp + sp[mc->bytesperline_chr]+1)>>1) + 1)>>1; dp++; sp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; }}static void ChromaHFadd(MotionCompensation_SglMB::MCData* mc){ const Pixel* sp = mc->nextimg.cr; Pixel* dp = mc->currimg.cr; int h = mc->blkheight_chr; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + ((*sp + sp[1]+1)>>1) + 1)>>1; dp++; sp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; } sp = mc->nextimg.cb; dp = mc->currimg.cb; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + ((*sp + sp[1]+1)>>1) + 1)>>1; dp++; sp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; }}static void ChromaHHadd(MotionCompensation_SglMB::MCData* mc){ const Pixel* sp = mc->nextimg.cr; Pixel* dp = mc->currimg.cr; int h = mc->blkheight_chr; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + ((*sp + sp[1] + sp[mc->bytesperline_chr] + sp[mc->bytesperline_chr+1]+2)>>2)+1)>>1; sp++; dp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; } sp = mc->nextimg.cb; dp = mc->currimg.cb; for (int y=0;y<h;y++) { for (int x=0;x<8;x++) { *dp = (*dp + ((*sp + sp[1] + sp[mc->bytesperline_chr] + sp[mc->bytesperline_chr+1]+2)>>2)+1)>>1; sp++; dp++; } sp += mc->bytesperline_chr-8; dp += mc->bytesperline_chr-8; }}static void MC_420_DBL_FFFF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFF(mc); LumaFFadd(mc); }static void MC_420_DBL_FHFF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFF(mc); LumaFHadd(mc); }static void MC_420_DBL_HFFF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFF(mc); LumaHFadd(mc); }static void MC_420_DBL_HHFF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFF(mc); LumaHHadd(mc); }static void MC_420_DBL_FFFH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFH(mc); LumaFFadd(mc); }static void MC_420_DBL_FHFH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFH(mc); LumaFHadd(mc); }static void MC_420_DBL_HFFH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFH(mc); LumaHFadd(mc); }static void MC_420_DBL_HHFH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaFH(mc); LumaHHadd(mc); }static void MC_420_DBL_FFHF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHF(mc); LumaFFadd(mc); }static void MC_420_DBL_FHHF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHF(mc); LumaFHadd(mc); }static void MC_420_DBL_HFHF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHF(mc); LumaHFadd(mc); }static void MC_420_DBL_HHHF_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHF(mc); LumaHHadd(mc); }static void MC_420_DBL_FFHH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHH(mc); LumaFFadd(mc); }static void MC_420_DBL_FHHH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHH(mc); LumaFHadd(mc); }static void MC_420_DBL_HFHH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHH(mc); LumaHFadd(mc); }static void MC_420_DBL_HHHH_Luma(MotionCompensation_SglMB::MCData* mc) { LumaHH(mc); LumaHHadd(mc); } static void MC_420_DBL_FFFF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFF(mc); ChromaFFadd(mc); }static void MC_420_DBL_FHFF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFF(mc); ChromaFHadd(mc); }static void MC_420_DBL_HFFF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFF(mc); ChromaHFadd(mc); }static void MC_420_DBL_HHFF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFF(mc); ChromaHHadd(mc); }static void MC_420_DBL_FFFH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFH(mc); ChromaFFadd(mc); }static void MC_420_DBL_FHFH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFH(mc); ChromaFHadd(mc); }static void MC_420_DBL_HFFH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFH(mc); ChromaHFadd(mc); }static void MC_420_DBL_HHFH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaFH(mc); ChromaHHadd(mc); }static void MC_420_DBL_FFHF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHF(mc); ChromaFFadd(mc); }static void MC_420_DBL_FHHF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHF(mc); ChromaFHadd(mc); }static void MC_420_DBL_HFHF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHF(mc); ChromaHFadd(mc); }static void MC_420_DBL_HHHF_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHF(mc); ChromaHHadd(mc); }static void MC_420_DBL_FFHH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHH(mc); ChromaFFadd(mc); }static void MC_420_DBL_FHHH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHH(mc); ChromaFHadd(mc); }static void MC_420_DBL_HFHH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHH(mc); ChromaHFadd(mc); }static void MC_420_DBL_HHHH_Chroma(MotionCompensation_SglMB::MCData* mc) { ChromaHH(mc); ChromaHHadd(mc); }MotionCompensation_SglMB_MMX::MotionCompensation_SglMB_MMX(){ sglluma[0] = LumaFF; sglluma[MC_Last_HalfH] = LumaHF; sglluma[MC_Last_HalfV] = LumaFH; sglluma[MC_Last_HalfH | MC_Last_HalfV] = LumaHH; sglchroma420[0] = ChromaFF; sglchroma420[MC_Last_HalfH] = ChromaHF; sglchroma420[MC_Last_HalfV] = ChromaFH; sglchroma420[MC_Last_HalfH | MC_Last_HalfV] = ChromaHH; for (int i=0;i<4;i++) { sglchroma422[i] = ChromaFF; sglchroma444[i] = ChromaFF; } dblluma[ 0] = MC_420_DBL_FFFF_Luma; dblluma[ 1] = MC_420_DBL_FFFH_Luma; dblluma[ 2] = MC_420_DBL_FFHF_Luma; dblluma[ 3] = MC_420_DBL_FFHH_Luma; dblluma[ 4] = MC_420_DBL_FHFF_Luma; dblluma[ 5] = MC_420_DBL_FHFH_Luma; dblluma[ 6] = MC_420_DBL_FHHF_Luma; dblluma[ 7] = MC_420_DBL_FHHH_Luma; dblluma[ 8] = MC_420_DBL_HFFF_Luma; dblluma[ 9] = MC_420_DBL_HFFH_Luma; dblluma[10] = MC_420_DBL_HFHF_Luma; dblluma[11] = MC_420_DBL_HFHH_Luma; dblluma[12] = MC_420_DBL_HHFF_Luma; dblluma[13] = MC_420_DBL_HHFH_Luma; dblluma[14] = MC_420_DBL_HHHF_Luma; dblluma[15] = MC_420_DBL_HHHH_Luma; dblchroma420[ 0] = MC_420_DBL_FFFF_Chroma; dblchroma420[ 1] = MC_420_DBL_FFFH_Chroma; dblchroma420[ 2] = MC_420_DBL_FFHF_Chroma; dblchroma420[ 3] = MC_420_DBL_FFHH_Chroma; dblchroma420[ 4] = MC_420_DBL_FHFF_Chroma; dblchroma420[ 5] = MC_420_DBL_FHFH_Chroma; dblchroma420[ 6] = MC_420_DBL_FHHF_Chroma; dblchroma420[ 7] = MC_420_DBL_FHHH_Chroma; dblchroma420[ 8] = MC_420_DBL_HFFF_Chroma; dblchroma420[ 9] = MC_420_DBL_HFFH_Chroma; dblchroma420[10] = MC_420_DBL_HFHF_Chroma; dblchroma420[11] = MC_420_DBL_HFHH_Chroma; dblchroma420[12] = MC_420_DBL_HHFF_Chroma; dblchroma420[13] = MC_420_DBL_HHFH_Chroma; dblchroma420[14] = MC_420_DBL_HHHF_Chroma; dblchroma420[15] = MC_420_DBL_HHHH_Chroma;}MotionCompensation_SglMB::MCompFunc*const*MotionCompensation_SglMB_MMX::AskMCompFunc_Sgl_Luma() const{ return sglluma;}MotionCompensation_SglMB::MCompFunc*const*MotionCompensation_SglMB_MMX::AskMCompFunc_Dbl_Luma() const{ return dblluma;}MotionCompensation_SglMB::MCompFunc*const*MotionCompensation_SglMB_MMX::AskMCompFunc_Sgl_Chroma(uint2 chroma) const{ return sglchroma420;}MotionCompensation_SglMB::MCompFunc*const*MotionCompensation_SglMB_MMX::AskMCompFunc_Dbl_Chroma(uint2 chroma) const{ return dblchroma420;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -