⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dequantize_mmx.h

📁 一个很好用的MPEG1/4的开源编码器
💻 H
📖 第 1 页 / 共 2 页
字号:
		DEQUANTIZE_INTRA_LOCAL_STEP(4)		DEQUANTIZE_PRESCALE_STEP(4)		DEQUANTIZE_INTRA_LOCAL_STEP(5)		DEQUANTIZE_PRESCALE_STEP(5)		DEQUANTIZE_INTRA_LOCAL_STEP(6)		DEQUANTIZE_PRESCALE_STEP(6)		DEQUANTIZE_INTRA_LOCAL_STEP(7)		DEQUANTIZE_PRESCALE_STEP(7)	        : "=r"(block), "=r"(dqmatrix), "=r"(cache), "=r"(psmatrix)		: "0"(block), "1"(dqmatrix), "2"(cache), "3"(psmatrix)		: "memory");}static void inline dequantize_inter_global(dct_t *block,					   dct_t *cache,					   dct_t *dqmatrix,					   dct_t *psmatrix,					   dct_t *mismatch){  unsigned int m;#define DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(x)				     \	"movq 0x" #x "0(%0), %%mm4\n"     /* load 1st line 1st half */	     \	"pxor %%mm2, %%mm2\n"        /* mm2 = 1st line 1st half */	     \        "movq %%mm4, %%mm0\n" \	"movq 0x" #x "8(%0), %%mm5\n"     /* load 1st line 2nd half */	     \	"pxor %%mm3, %%mm3\n"        /* mm3 = 1st line 1st half */	     \        "movq %%mm5, %%mm1\n" \	"psllw $1, %%mm0\n"            /* mm0 = 2*mm0 */	     \	"pcmpgtw %%mm4, %%mm2\n"       /* mm2 = (mm0<0)?0xffff:0x0000 */     \	"psllw $1, %%mm1\n"            /* mm1 = 2*mm1 */	     \	"pcmpgtw %%mm5, %%mm3\n"       /* mm3 = (mm1<0)?0xffff:0x0000 */     \	"pxor %%mm2, %%mm0\n"          /* mm0 = 2*|mm0|-(mm0<0)*/	     \	"pxor %%mm3, %%mm1\n"          /* mm1 = 2*|mm1|-(mm1<0)*/	     \	"pcmpeqw %%mm7, %%mm4\n"       /* mm4 = (mm0==0)?0xffff:0x0000 */     \	"pcmpeqw %%mm7, %%mm5\n"       /* mm5 = (mm1==0)?0xffff:0x0000 */     \	"psubsw %%mm2, %%mm0\n"        /* mm0 = 2*|mm0| */	     \	"psubsw %%mm3, %%mm1\n"        /* mm1 = 2*|mm1| */	     \	"pcmpeqw %%mm7, %%mm4\n"       /* mm4 = (mm0==0)?0x0000:0xffff */     \	"pcmpeqw %%mm7, %%mm5\n"       /* mm5 = (mm1==0)?0x0000:0xffff */     \	"psubw %%mm4, %%mm0\n"         /* mm0 = 2*|mm0|+(mm0!=0) */     \	"psubw %%mm5, %%mm1\n"         /* mm1 = 2*|mm0|+(mm0!=0) */     \	"pmullw 0x" #x "0(%1), %%mm0\n" /* mm0=(2*|mm0|+1)*Q */ \	"pmullw 0x" #x "8(%1), %%mm1\n" /* mm1=(2*|mm0|+1)*Q */ \	"psraw $0x04, %%mm0\n"       /* divide by 16 */			     \	"psraw $0x04, %%mm1\n"       /* divide by 16 */			     \	"pxor %%mm2, %%mm0\n"       /* mm0 =(2*|mm0|+1)*Q*sign(mm0)-(mm0<0)*/ \        "pxor %%mm3, %%mm1\n"       /* mm1 =(2*|mm1|+1)*Q*sign(mm1)-(mm1<0)*/ \	"psubsw %%mm2, %%mm0\n"        /* mm0 =(2*|mm0|+1)*Q*sign(mm0) */     \	"psubsw %%mm3, %%mm1\n"        /* mm1 =(2*|mm1|+1)*Q*sign(mm1) */     \	"pxor %%mm0, %%mm6\n"        /* accumulate mismatch */		     \	"pxor %%mm1, %%mm6\n"        /* accumulate mismatch */  asm volatile ("pxor %%mm7, %%mm7\n"        /* mm7 = 0 */		"pxor %%mm6, %%mm6\n"        /* mm6 = mismatch accumulator */		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(0)		DEQUANTIZE_PRESCALE_STEP(0)		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(1)		DEQUANTIZE_PRESCALE_STEP(1)		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(2)		DEQUANTIZE_PRESCALE_STEP(2)		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(3)		DEQUANTIZE_PRESCALE_STEP(3)		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(4)		DEQUANTIZE_PRESCALE_STEP(4)		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(5)		DEQUANTIZE_PRESCALE_STEP(5)		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(6)		DEQUANTIZE_PRESCALE_STEP(6)		DEQUANTIZE_INTER_GLOBAL_DEQUANT_STEP(7)		DEQUANTIZE_GLOBAL_MISMATCH_CONTROL()		/* WARNING : mismatch control is too small and would be zeroed */		/* by prescale. This would cause artifacts on the long term */		/* since the last coefficient has high chances of being 0 */		/* and thus should be rounded up most of the time. */		/* Thus we accumulate mismatch instead until it gets */		/* large enough to produce significant output after iDCT */		/* resetting the accumulator when the block is coded intra */		DEQUANTIZE_PRESCALE_STEP(7) 	        : "=r"(block), "=r"(dqmatrix), "=r"(cache), "=r"(psmatrix)		: "0"(block), "1"(dqmatrix), "2"(cache), "3"(psmatrix)		: "memory");  asm volatile("movd %%mm6, %0\n"           /* export mismatch */	       : "=r"(m)	       : /* no input */	       );  *mismatch += (dct_t) (m<<12);  /* threshold is ((1 << 16)/(16*psmatrix[63]) * (1 << 12) + 0.5) = 26887 */#define MISMATCH_THRESHOLD 26887  if(*mismatch > MISMATCH_THRESHOLD) { /* after this threshold, prescaled mismatch is >= 1 */    cache[63] ++; /* add mismatch */    *mismatch -= MISMATCH_THRESHOLD;  }  if(*mismatch < (-26887)) {    cache[63] --; /* sub mismatch */    *mismatch += MISMATCH_THRESHOLD;  }}static void inline dequantize_inter_local(dct_t *block,					  dct_t *cache,					  dct_t *dqmatrix,					  dct_t *psmatrix,					  dct_t *mismatch /* not used */){  /* coeff[i] = ((2*level[i]+sign(level[i]))*qscale*matrix[i])/16 */  /* then coeff[i] = { coeff[i] + 1, if coeff[i] < 0 and coeff[i] is even */  /*                 { coeff[i] - 1, if coeff[i] > 0 and coeff[i] is even */  /*                 { coeff[i] otherwise                                 */  /* TODO: check efficiency of new inter_global method on this */#define DEQUANTIZE_INTER_LOCAL_STEP(x)					 \    "movq 0x" #x "0(%0), %%mm0\n"     /* load 1st line 1st half */	 \    "movq 0x" #x "8(%0), %%mm1\n"     /* load 1st line 2nd half */	 \    "movq %%mm0, %%mm2\n"             /* mm2 = 1st line 1st half */	 \    "movq %%mm1, %%mm3\n"             /* mm3 = 1st line 1st half */	 \    "psraw $0x0f, %%mm2\n"            /* mm2 = (sign(mm0) - 1) / 2 */	 \    "psraw $0x0f, %%mm3\n"            /* mm3 = (sign(mm0) - 1) / 2 */	 \    "paddsw %%mm2, %%mm0\n"           /* mm0 = [0-3]+(sign([0-3])-1)/2*/ \    "paddsw %%mm3, %%mm1\n"           /* mm1 = [4-7]+(sign([0-3])-1)/2*/ \    "paddsw %%mm0, %%mm0\n"           /* mm0 = 2*[0-3]+sign([0-3])-1 */	 \    "paddsw %%mm1, %%mm1\n"           /* mm1 = 2*[4-7]+sign([4-7])-1 */	 \    "pmullw 0x" #x "0(%1), %%mm0\n"   /* mm0=(2*[0-3]+sign([0-3])-1)*Q*/ \    "pmullw 0x" #x "8(%1), %%mm1\n"   /* mm1=(2*[4-7]+sign([4-7])-1)*Q*/ \    "movq %%mm0, %%mm4\n"             /* mm4 = mm0 */       		 \    "movq %%mm1, %%mm5\n"             /* mm5 = mm1 */			 \    "paddsw 0x" #x "0(%1), %%mm0\n"   /* mm0=(2*[0-3]+sign([0-3]))*Q*/ \    "paddsw 0x" #x "8(%1), %%mm1\n"   /* mm1=(2*[4-7]+sign([4-7]))*Q*/ \    "pcmpeqw %%mm7, %%mm4\n"          /* mm4[0-3]=0xFF if mm4[0-3]==0 */ \    "pcmpeqw %%mm7, %%mm5\n"          /* mm5[0-3]=0xFF if mm5[0-3]==0 */ \    "pcmpeqw %%mm7, %%mm4\n"          /* mm4[0-3]=0xFF if mm0[0-3]!=0 */ \    "pcmpeqw %%mm7, %%mm5\n"          /* mm5[0-3]=0xFF if mm1[0-3]!=0 */ \    "psllw $0x04, %%mm2\n"            /* sign adjust before shift */	 \    "psllw $0x04, %%mm3\n"            /* sign adjust before shift */	 \    "psubw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \    "psubw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \    "psraw $0x04, %%mm2\n"            /* sign adjust before shift */	 \    "psraw $0x04, %%mm3\n"            /* sign adjust before shift */	 \    "paddw %%mm2, %%mm0\n"            /* sign adjust before shift */	 \    "paddw %%mm3, %%mm1\n"            /* sign adjust before shift */	 \    "psraw $0x04, %%mm0\n"            /* divide by 16 */		 \    "psraw $0x04, %%mm1\n"            /* divide by 16 */		 \    "pcmpeqw %%mm7, %%mm2\n"          /* invert sign */                  \    "pcmpeqw %%mm7, %%mm3\n"          /* invert sign */                  \    "paddw %%mm2, %%mm0\n"            /* sub 1 if >0 */                  \    "paddw %%mm3, %%mm1\n"            /* sub 1 if >0 */                  \    "por " ASMSYM "_mmx_1, %%mm0\n"   /* or 1 */               \    "por " ASMSYM "_mmx_1, %%mm1\n"   /* or 1 */               \    "pand %%mm4, %%mm0\n"             /* [0-3]=0 if [0-3] was zero */	 \    "pand %%mm5, %%mm1\n"             /* [4-7]=0 if [4-7] was zero */  asm volatile ("pxor %%mm7, %%mm7\n"        /* mm7 = 0 */		"pxor %%mm6, %%mm6\n"        /* mm6 = mismatch accumulator */		DEQUANTIZE_INTER_LOCAL_STEP(0)		DEQUANTIZE_PRESCALE_STEP(0)		DEQUANTIZE_INTER_LOCAL_STEP(1)		DEQUANTIZE_PRESCALE_STEP(1)		DEQUANTIZE_INTER_LOCAL_STEP(2)		DEQUANTIZE_PRESCALE_STEP(2)		DEQUANTIZE_INTER_LOCAL_STEP(3)		DEQUANTIZE_PRESCALE_STEP(3)		DEQUANTIZE_INTER_LOCAL_STEP(4)		DEQUANTIZE_PRESCALE_STEP(4)		DEQUANTIZE_INTER_LOCAL_STEP(5)		DEQUANTIZE_PRESCALE_STEP(5)		DEQUANTIZE_INTER_LOCAL_STEP(6)		DEQUANTIZE_PRESCALE_STEP(6)		DEQUANTIZE_INTER_LOCAL_STEP(7)		DEQUANTIZE_PRESCALE_STEP(7)	        : "=r"(block), "=r"(dqmatrix), "=r"(cache), "=r"(psmatrix)		: "0"(block), "1"(dqmatrix), "2"(cache), "3"(psmatrix)		: "memory");}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -