📄 dsp_mmx.c
字号:
psubw mm2, mm1 /* mm2 = FiltPtr - 128 */
movq [ebx], mm0 /* write answer out */
movq [8 + ebx], mm2 /* write answer out */
/* Increment pointers */
add ebx, 16
add eax, PixelsPerLine
};
#endif
}
static void sub8x8avg2__mmx (unsigned char *FiltPtr, unsigned char *ReconPtr1,
unsigned char *ReconPtr2, ogg_int16_t *DctInputPtr,
ogg_uint32_t PixelsPerLine,
ogg_uint32_t ReconPixelsPerLine)
{
#if 0
int i; /* For each block row */ for (i=8; i; i--) { DctInputPtr[0] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[0], DSP_OP_AVG (ReconPtr1[0], ReconPtr2[0])); DctInputPtr[1] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[1], DSP_OP_AVG (ReconPtr1[1], ReconPtr2[1])); DctInputPtr[2] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[2], DSP_OP_AVG (ReconPtr1[2], ReconPtr2[2])); DctInputPtr[3] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[3], DSP_OP_AVG (ReconPtr1[3], ReconPtr2[3])); DctInputPtr[4] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[4], DSP_OP_AVG (ReconPtr1[4], ReconPtr2[4])); DctInputPtr[5] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[5], DSP_OP_AVG (ReconPtr1[5], ReconPtr2[5])); DctInputPtr[6] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[6], DSP_OP_AVG (ReconPtr1[6], ReconPtr2[6])); DctInputPtr[7] = (ogg_int16_t) DSP_OP_DIFF (FiltPtr[7], DSP_OP_AVG (ReconPtr1[7], ReconPtr2[7])); /* Start next row */ FiltPtr += PixelsPerLine; ReconPtr1 += ReconPixelsPerLine; ReconPtr2 += ReconPixelsPerLine; DctInputPtr += 8; }
#else
__asm {
align 16
pxor mm7, mm7
mov eax, FiltPtr
mov ebx, ReconPtr1
mov ecx, ReconPtr2
mov edx, DctInputPtr
/* ITERATION 1 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
/* ITERATION 2 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
/* ITERATION 3 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
/* ITERATION 4 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
/* ITERATION 5 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
/* ITERATION 6 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
/* ITERATION 7 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
/* ITERATION 8 */
movq mm0, [eax] ; /* mm0 = FiltPtr */
movq mm1, [ebx] ; /* mm1 = ReconPtr1 */
movq mm4, [ecx] ; /* mm1 = ReconPtr2 */
movq mm2, mm0 ; /* dup to prepare for up conversion */
movq mm3, mm1 ; /* dup to prepare for up conversion */
movq mm5, mm4 ; /* dup to prepare for up conversion */
; /* convert from UINT8 to INT16 */
punpcklbw mm0, mm7 ; /* mm0 = INT16(FiltPtr) */
punpcklbw mm1, mm7 ; /* mm1 = INT16(ReconPtr1) */
punpcklbw mm4, mm7 ; /* mm1 = INT16(ReconPtr2) */
punpckhbw mm2, mm7 ; /* mm2 = INT16(FiltPtr) */
punpckhbw mm3, mm7 ; /* mm3 = INT16(ReconPtr1) */
punpckhbw mm5, mm7 ; /* mm3 = INT16(ReconPtr2) */
; /* average ReconPtr1 and ReconPtr2 */
paddw mm1, mm4 ; /* mm1 = ReconPtr1 + ReconPtr2 */
paddw mm3, mm5 ; /* mm3 = ReconPtr1 + ReconPtr2 */
psrlw mm1, 1 ; /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */
psrlw mm3, 1 ; /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */
psubw mm0, mm1 ; /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
psubw mm2, mm3 ; /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */
movq [edx], mm0 ; /* write answer out */
movq [8 + edx], mm2 ; /* write answer out */
; /* Increment pointers */
add edx, 16 ;
add eax, PixelsPerLine ;
add ebx, ReconPixelsPerLine ;
add ecx, ReconPixelsPerLine ;
};
#endif
}
static ogg_uint32_t row_sad8__mmx (unsigned char *Src1, unsigned char *Src2)
{
#if 0
ogg_uint32_t SadValue; ogg_uint32_t SadValue1; SadValue = DSP_OP_ABS_DIFF (Src1[0], Src2[0]) + DSP_OP_ABS_DIFF (Src1[1], Src2[1]) + DSP_OP_ABS_DIFF (Src1[2], Src2[2]) + DSP_OP_ABS_DIFF (Src1[3], Src2[3]); SadValue1 = DSP_OP_ABS_DIFF (Src1[4], Src2[4]) + DSP_OP_ABS_DIFF (Src1[5], Src2[5]) + DSP_OP_ABS_DIFF (Src1[6], Src2[6]) + DSP_OP_ABS_DIFF (Src1[7], Src2[7]); SadValue = ( SadValue > SadValue1 ) ? SadValue : SadValue1; return SadValue;
#else
ogg_uint32_t MaxSad;
__asm {
align 16
mov ebx, Src1
mov ecx, Src2
pxor mm6, mm6 ; /* zero out mm6 for unpack */
pxor mm7, mm7 ; /* zero out mm7 for unpack */
movq mm0, [ebx] ; /* take 8 bytes */
movq mm1, [ecx] ;
movq mm2, mm0 ;
psubusb mm0, mm1 ; /* A - B */
psubusb mm1, mm2 ; /* B - A */
por mm0, mm1 ; /* and or gives abs difference */
movq mm1, mm0 ;
punpcklbw mm0, mm6 ; /* ; unpack low four bytes to higher precision */
punpckhbw mm1, mm7 ; /* ; unpack high four bytes to higher precision */
movq mm2, mm0 ;
movq mm3, mm1 ;
psrlq mm2, 32 ; /* fold and add */
psrlq mm3, 32 ;
paddw mm0, mm2 ;
paddw mm1, mm3 ;
movq mm2, mm0 ;
movq mm3, mm1 ;
psrlq mm2, 16 ;
psrlq mm3, 16 ;
paddw mm0, mm2 ;
paddw mm1, mm3 ;
psubusw mm1, mm0 ;
paddw mm1, mm0 ; /* mm1 = max(mm1, mm0) */
movd eax, mm1 ;
and eax, 0xffff
mov MaxSad, eax
};
return MaxSad;
#endif
}
static ogg_uint32_t col_sad8x8__mmx (unsigned char *Src1, unsigned char *Src2,
ogg_uint32_t stride)
{
#if 0
ogg_uint32_t SadValue[8] = {0,0,0,0,0,0,0,0}; ogg_uint32_t SadValue2[8] = {0,0,0,0,0,0,0,0}; ogg_uint32_t MaxSad = 0; ogg_uint32_t i; for ( i = 0; i < 4; i++ ){ SadValue[0] += abs(Src1[0] - Src2[0]); SadValue[1] += abs(Src1[1] - Src2[1]); SadValue[2] += abs(Src1[2] - Src2[2]); SadValue[3] += abs(Src1[3] - Src2[3]); SadValue[4] += abs(Src1[4] - Src2[4]); SadValue[5] += abs(Src1[5] - Src2[5]); SadValue[6] += abs(Src1[6] - Src2[6]); SadValue[7] += abs(Src1[7] - Src2[7]); Src1 += stride; Src2 += stride; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -