📄 dsp_mmx.c
字号:
: "r" (stride1), "r" (stride2) : "memory" ); return DiffVal;}static ogg_uint32_t sad8x8_thres__mmx (unsigned char *ptr1, ogg_uint32_t stride1, unsigned char *ptr2, ogg_uint32_t stride2, ogg_uint32_t thres){ return sad8x8__mmx (ptr1, stride1, ptr2, stride2);}static ogg_uint32_t sad8x8_xy2_thres__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, unsigned char *RefDataPtr1, unsigned char *RefDataPtr2, ogg_uint32_t RefStride, ogg_uint32_t thres){ ogg_uint32_t DiffVal; __asm__ __volatile__ ( " .balign 16 \n\t" " pcmpeqd %%mm5, %%mm5 \n\t" /* fefefefefefefefe in mm5 */ " paddb %%mm5, %%mm5 \n\t" " pxor %%mm6, %%mm6 \n\t" /* zero out mm6 for unpack */ " pxor %%mm7, %%mm7 \n\t" /* mm7 contains the result */ " mov $8, %%edi \n\t" /* 8 rows */ "1: \n\t" " movq (%1), %%mm0 \n\t" /* take 8 bytes */ " movq (%2), %%mm2 \n\t" " movq (%3), %%mm3 \n\t" /* take average of mm2 and mm3 */ " movq %%mm2, %%mm1 \n\t" " pand %%mm3, %%mm1 \n\t" " pxor %%mm2, %%mm3 \n\t" " pand %%mm5, %%mm3 \n\t" " psrlq $1, %%mm3 \n\t" " paddb %%mm3, %%mm1 \n\t" " movq %%mm0, %%mm2 \n\t" " psubusb %%mm1, %%mm0 \n\t" /* A - B */ " psubusb %%mm2, %%mm1 \n\t" /* B - A */ " por %%mm1, %%mm0 \n\t" /* and or gives abs difference */ " movq %%mm0, %%mm1 \n\t" " punpcklbw %%mm6, %%mm0 \n\t" /* unpack to higher precision for accumulation */ " paddw %%mm0, %%mm7 \n\t" /* accumulate difference... */ " punpckhbw %%mm6, %%mm1 \n\t" /* unpack high four bytes to higher precision */ " add %4, %1 \n\t" /* Inc pointer into the new data */ " paddw %%mm1, %%mm7 \n\t" /* accumulate difference... */ " add %5, %2 \n\t" /* Inc pointer into ref data */ " add %5, %3 \n\t" /* Inc pointer into ref data */ " dec %%edi \n\t" " jnz 1b \n\t" " movq %%mm7, %%mm0 \n\t" " psrlq $32, %%mm7 \n\t" " paddw %%mm0, %%mm7 \n\t" " movq %%mm7, %%mm0 \n\t" " psrlq $16, %%mm7 \n\t" " paddw %%mm0, %%mm7 \n\t" " movd %%mm7, %0 \n\t" " andl $0xffff, %0 \n\t" : "=m" (DiffVal), "+r" (SrcData), "+r" (RefDataPtr1), "+r" (RefDataPtr2) : "m" (SrcStride), "m" (RefStride) : "edi", "memory" ); return DiffVal;}static ogg_uint32_t intra8x8_err__mmx (unsigned char *DataPtr, ogg_uint32_t Stride){ ogg_uint32_t XSum; ogg_uint32_t XXSum; __asm__ __volatile__ ( " .balign 16 \n\t" " pxor %%mm5, %%mm5 \n\t" " pxor %%mm6, %%mm6 \n\t" " pxor %%mm7, %%mm7 \n\t" " mov $8, %%edi \n\t" "1: \n\t" " movq (%2), %%mm0 \n\t" /* take 8 bytes */ " movq %%mm0, %%mm2 \n\t" " punpcklbw %%mm6, %%mm0 \n\t" " punpckhbw %%mm6, %%mm2 \n\t" " paddw %%mm0, %%mm5 \n\t" " paddw %%mm2, %%mm5 \n\t" " pmaddwd %%mm0, %%mm0 \n\t" " pmaddwd %%mm2, %%mm2 \n\t" " paddd %%mm0, %%mm7 \n\t" " paddd %%mm2, %%mm7 \n\t" " add %3, %2 \n\t" /* Inc pointer into src data */ " dec %%edi \n\t" " jnz 1b \n\t" " movq %%mm5, %%mm0 \n\t" " psrlq $32, %%mm5 \n\t" " paddw %%mm0, %%mm5 \n\t" " movq %%mm5, %%mm0 \n\t" " psrlq $16, %%mm5 \n\t" " paddw %%mm0, %%mm5 \n\t" " movd %%mm5, %%edi \n\t" " movsx %%di, %%edi \n\t" " movl %%edi, %0 \n\t" " movq %%mm7, %%mm0 \n\t" " psrlq $32, %%mm7 \n\t" " paddd %%mm0, %%mm7 \n\t" " movd %%mm7, %1 \n\t" : "=r" (XSum), "=r" (XXSum), "+r" (DataPtr) : "r" (Stride) : "edi", "memory" ); /* Compute population variance as mis-match metric. */ return (( (XXSum<<6) - XSum*XSum ) );}static ogg_uint32_t inter8x8_err__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, unsigned char *RefDataPtr, ogg_uint32_t RefStride){ ogg_uint32_t XSum; ogg_uint32_t XXSum; __asm__ __volatile__ ( " .balign 16 \n\t" " pxor %%mm5, %%mm5 \n\t" " pxor %%mm6, %%mm6 \n\t" " pxor %%mm7, %%mm7 \n\t" " mov $8, %%edi \n\t" "1: \n\t" " movq (%2), %%mm0 \n\t" /* take 8 bytes */ " movq (%3), %%mm1 \n\t" " movq %%mm0, %%mm2 \n\t" " movq %%mm1, %%mm3 \n\t" " punpcklbw %%mm6, %%mm0 \n\t" " punpcklbw %%mm6, %%mm1 \n\t" " punpckhbw %%mm6, %%mm2 \n\t" " punpckhbw %%mm6, %%mm3 \n\t" " psubsw %%mm1, %%mm0 \n\t" " psubsw %%mm3, %%mm2 \n\t" " paddw %%mm0, %%mm5 \n\t" " paddw %%mm2, %%mm5 \n\t" " pmaddwd %%mm0, %%mm0 \n\t" " pmaddwd %%mm2, %%mm2 \n\t" " paddd %%mm0, %%mm7 \n\t" " paddd %%mm2, %%mm7 \n\t" " add %4, %2 \n\t" /* Inc pointer into src data */ " add %5, %3 \n\t" /* Inc pointer into ref data */ " dec %%edi \n\t" " jnz 1b \n\t" " movq %%mm5, %%mm0 \n\t" " psrlq $32, %%mm5 \n\t" " paddw %%mm0, %%mm5 \n\t" " movq %%mm5, %%mm0 \n\t" " psrlq $16, %%mm5 \n\t" " paddw %%mm0, %%mm5 \n\t" " movd %%mm5, %%edi \n\t" " movsx %%di, %%edi \n\t" " movl %%edi, %0 \n\t" " movq %%mm7, %%mm0 \n\t" " psrlq $32, %%mm7 \n\t" " paddd %%mm0, %%mm7 \n\t" " movd %%mm7, %1 \n\t" : "=m" (XSum), "=m" (XXSum), "+r" (SrcData), "+r" (RefDataPtr) : "m" (SrcStride), "m" (RefStride) : "edi", "memory" ); /* Compute and return population variance as mis-match metric. */ return (( (XXSum<<6) - XSum*XSum ));}static ogg_uint32_t inter8x8_err_xy2__mmx (unsigned char *SrcData, ogg_uint32_t SrcStride, unsigned char *RefDataPtr1, unsigned char *RefDataPtr2, ogg_uint32_t RefStride){ ogg_uint32_t XSum; ogg_uint32_t XXSum; __asm__ __volatile__ ( " .balign 16 \n\t" " pcmpeqd %%mm4, %%mm4 \n\t" /* fefefefefefefefe in mm4 */ " paddb %%mm4, %%mm4 \n\t" " pxor %%mm5, %%mm5 \n\t" " pxor %%mm6, %%mm6 \n\t" " pxor %%mm7, %%mm7 \n\t" " mov $8, %%edi \n\t" "1: \n\t" " movq (%2), %%mm0 \n\t" /* take 8 bytes */ " movq (%3), %%mm2 \n\t" " movq (%4), %%mm3 \n\t" /* take average of mm2 and mm3 */ " movq %%mm2, %%mm1 \n\t" " pand %%mm3, %%mm1 \n\t" " pxor %%mm2, %%mm3 \n\t" " pand %%mm4, %%mm3 \n\t" " psrlq $1, %%mm3 \n\t" " paddb %%mm3, %%mm1 \n\t" " movq %%mm0, %%mm2 \n\t" " movq %%mm1, %%mm3 \n\t" " punpcklbw %%mm6, %%mm0 \n\t" " punpcklbw %%mm6, %%mm1 \n\t" " punpckhbw %%mm6, %%mm2 \n\t" " punpckhbw %%mm6, %%mm3 \n\t" " psubsw %%mm1, %%mm0 \n\t" " psubsw %%mm3, %%mm2 \n\t" " paddw %%mm0, %%mm5 \n\t" " paddw %%mm2, %%mm5 \n\t" " pmaddwd %%mm0, %%mm0 \n\t" " pmaddwd %%mm2, %%mm2 \n\t" " paddd %%mm0, %%mm7 \n\t" " paddd %%mm2, %%mm7 \n\t" " add %5, %2 \n\t" /* Inc pointer into src data */ " add %6, %3 \n\t" /* Inc pointer into ref data */ " add %6, %4 \n\t" /* Inc pointer into ref data */ " dec %%edi \n\t" " jnz 1b \n\t" " movq %%mm5, %%mm0 \n\t" " psrlq $32, %%mm5 \n\t" " paddw %%mm0, %%mm5 \n\t" " movq %%mm5, %%mm0 \n\t" " psrlq $16, %%mm5 \n\t" " paddw %%mm0, %%mm5 \n\t" " movd %%mm5, %%edi \n\t" " movsx %%di, %%edi \n\t" " movl %%edi, %0 \n\t" " movq %%mm7, %%mm0 \n\t" " psrlq $32, %%mm7 \n\t" " paddd %%mm0, %%mm7 \n\t" " movd %%mm7, %1 \n\t" : "=m" (XSum), "=m" (XXSum), "+r" (SrcData), "+r" (RefDataPtr1), "+r" (RefDataPtr2) : "m" (SrcStride), "m" (RefStride) : "edi", "memory" ); /* Compute and return population variance as mis-match metric. */ return (( (XXSum<<6) - XSum*XSum ));}static void restore_fpu (void){ __asm__ __volatile__ ( " emms \n\t" );}void dsp_mmx_init(DspFunctions *funcs){ TH_DEBUG("enabling accelerated x86_32 mmx dsp functions.\n"); funcs->restore_fpu = restore_fpu; funcs->sub8x8 = sub8x8__mmx; funcs->sub8x8_128 = sub8x8_128__mmx; funcs->sub8x8avg2 = sub8x8avg2__mmx; funcs->row_sad8 = row_sad8__mmx; funcs->col_sad8x8 = col_sad8x8__mmx; funcs->sad8x8 = sad8x8__mmx; funcs->sad8x8_thres = sad8x8_thres__mmx; funcs->sad8x8_xy2_thres = sad8x8_xy2_thres__mmx; funcs->intra8x8_err = intra8x8_err__mmx; funcs->inter8x8_err = inter8x8_err__mmx; funcs->inter8x8_err_xy2 = inter8x8_err_xy2__mmx;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -