📄 mad_mmx.c
字号:
/************************************************************************** * * * This code is developed by Eugene Kuznetsov. This software is an * * implementation of a part of one or more MPEG-4 Video tools as * * specified in ISO/IEC 14496-2 standard. Those intending to use this * * software module in hardware or software products are advised that its * * use may infringe existing patents or copyrights, and any such use * * would be at such party's own risk. The original developer of this * * software module and his/her company, and subsequent editors and their * * companies (including Project Mayo), will have no liability for use of * * this software or modifications or derivatives thereof. * * * * Project Mayo gives users of the Codec a license to this software * * module or modifications thereof for use in hardware or software * * products claiming conformance to the MPEG-4 Video Standard as * * described in the Open DivX license. * * * * The complete Open DivX license can be found at * * http://www.projectmayo.com/opendivx/license.php . * * * **************************************************************************//************************************************************************** * * mad.c, utility functions that calculate MADs and SADs. * * Copyright (C) 2001 Project Mayo * * Eugene Kuznetsov * * DivX Advance Research Center <darc@projectmayo.com> * **************************************************************************//************************************************************************** * * Modifications: * * 19.09.2001 removed some warnings * * Michael Militzer <isibaar@videocoding.de> * **************************************************************************/#include "../mad.h"#define ABS(X) (((X)>0)?(X):-(X))float MAD_Image(const Image* pIm, const Image* pImage){ int x, y; int32_t sum=0;// int iStride=pImage->iWidth; int iStride2=pImage->iEdgedWidth; for(y=0; y<pImage->iHeight; y++) for(x=0; x<pImage->iWidth; x++) sum+=ABS((int32_t)pImage->pY[x+y*iStride2]-(int32_t)pIm->pY[x+y*iStride2]);// iStride/=2; iStride2/=2; for(y=0; y<pImage->iHeight/2; y++) for(x=0; x<pImage->iWidth/2; x++) sum+=ABS((int32_t)pImage->pU[x+y*iStride2]-(int32_t)pIm->pU[x+y*iStride2]); for(y=0; y<pImage->iHeight/2; y++) for(x=0; x<pImage->iWidth/2; x++) sum+=ABS((int32_t)pImage->pV[x+y*iStride2]-(int32_t)pIm->pV[x+y*iStride2]); return ((float)sum)/(pImage->iWidth*pImage->iHeight*3/2);}// x & y in blocks ( 8 pixel units )// dx & dy in pixelsstatic const int64_t mm_FFFFFFFFFFFFFFFF=0xFFFFFFFFFFFFFFFFi64;#define SAD_INIT \ __asm xor eax, eax \ __asm movq mm7, mm_FFFFFFFFFFFFFFFF \ __asm pxor mm0, mm0 \ __asm pxor mm1, mm1// "movl %1, %%ecx\n" // "movl %2, %%edx\n" #define SAD_ONE_STEP(X) \ __asm movq mm2, [ecx+X] \ __asm movq mm3, [edx+X] \ \ __asm movq mm4, mm2 \ __asm movq mm5, mm3 \ __asm punpcklbw mm2, mm0 \ __asm punpckhbw mm4, mm0 \ __asm punpcklbw mm3, mm0 \ __asm punpckhbw mm5, mm0 \ \ __asm psubw mm3, mm2 \ __asm psubw mm5, mm4 \ \ __asm movq mm2, mm3 \ __asm movq mm4, mm5 \ __asm pcmpgtw mm2, mm0 \ __asm pcmpgtw mm4, mm0 \ __asm pxor mm2, mm7 \ __asm pxor mm4, mm7 \ __asm pxor mm3, mm2 \ __asm pxor mm5, mm4 \ __asm psubw mm3, mm2 \ __asm psubw mm5, mm4 \ __asm paddusw mm1, mm3 \ __asm paddusw mm1, mm5 #define SAD_PACK \ __asm movq mm2, mm1 \ __asm psrlq mm1, 32 \ __asm paddusw mm1, mm2 \ __asm movq mm2, mm1 \ __asm psrlq mm1, 16 \ __asm paddusw mm1, mm2 \ __asm movd ecx, mm1 \ __asm and ecx, 0xFFFF int32_t SAD_Block(const Image* pIm, const Image* pImage, int x, int y, int dx, int dy, int sad_opt, int component){ int32_t sum=0;// int i, j; const uint8_t *pRef; const uint8_t *pCur; int iWidth=pImage->iWidth; int iEdgedWidth=pImage->iEdgedWidth; switch(component) { case 0: pRef=pIm->pY+x*8+y*8*pImage->iEdgedWidth; pCur=pImage->pY+(x*8+dx)+(y*8+dy)*pImage->iEdgedWidth; break; case 1: pRef=pIm->pU+x*8+y*8*pImage->iEdgedWidth/2; pCur=pImage->pU+(x*8+dx)+(y*8+dy)*pImage->iEdgedWidth/2; break; case 2: default: pRef=pIm->pV+x*8+y*8*pImage->iEdgedWidth/2; pCur=pImage->pV+(x*8+dx)+(y*8+dy)*pImage->iEdgedWidth/2; break; } if(component) { iWidth/=2; iEdgedWidth/=2; } SAD_INIT __asm mov edi, 8 __asm mov ecx, pRef __asm mov edx, pCur p1: SAD_ONE_STEP(0) __asm add ecx, iEdgedWidth __asm add edx, iEdgedWidth __asm dec edi __asm jnz p1 SAD_PACK __asm mov sum, ecx return sum;}int32_t SAD_Macroblock(const Image* pIm, const Image* pImageN, const Image* pImageH, const Image* pImageV, const Image* pImageHV, int x, int y, int dx, int dy, int sad_opt, int iQuality){ const Image* pImage; int32_t sum=0;// int i, j; const uint8_t *pRef; const uint8_t *pCur; int iWidth=pImageN->iEdgedWidth; int iEdgedWidth=pImageN->iEdgedWidth; switch(((dx%2)?2:0)+((dy%2)?1:0)) { case 0: pImage=pImageN; break; case 1: pImage=pImageV; dy--; break; case 2: pImage=pImageH; dx--; break; case 3: default: pImage=pImageHV; dx--; dy--; break; } dx/=2; dy/=2; pRef=pIm->pY+x*16+y*16*iEdgedWidth; pCur=pImage->pY+(x*16+dx)+(y*16+dy)*iEdgedWidth; switch(iQuality) { case 1: iEdgedWidth*=4; iEdgedWidth*=4; SAD_INIT __asm mov edi, 4 __asm mov ecx, pRef __asm mov edx, pCur p4: SAD_ONE_STEP(0) SAD_ONE_STEP(8) __asm add ecx, iEdgedWidth __asm add edx, iEdgedWidth __asm dec edi __asm jnz p4 SAD_PACK __asm mov sum, ecx return sum*4;case 2: iEdgedWidth*=2; iEdgedWidth*=2; SAD_INIT __asm mov edi, 8 __asm mov ecx, pRef __asm mov edx, pCur p3: SAD_ONE_STEP(0) SAD_ONE_STEP(8) __asm add ecx, iEdgedWidth __asm add edx, iEdgedWidth __asm dec edi __asm jnz p3 SAD_PACK __asm mov sum, ecx return sum*2;default: SAD_INIT __asm mov ecx, pRef __asm mov edx, pCur __asm mov edi, 16 p2: SAD_ONE_STEP(0) SAD_ONE_STEP(8) __asm add ecx, iEdgedWidth __asm add edx, iEdgedWidth __asm dec edi __asm jnz p2 SAD_PACK __asm mov sum, ecx return sum; }}int32_t SAD_Deviation_MB(const Image* pIm, int x, int y){ int32_t sum=0, avg=0; const uint8_t *pRef; int i, j; int width=pIm->iEdgedWidth; pRef=pIm->pY+x*16+y*16*width; for(i=0; i<16; i++) { for(j=0; j<16; j++) sum+=(int32_t)pRef[j]; pRef+=width; } sum/=256; pRef=pIm->pY+x*16+y*16*width; for(i=0; i<16; i++) { for(j=0; j<16; j++) avg+=ABS((int32_t)pRef[j]-sum); pRef+=width; } return avg;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -