⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 quantize_mmx.asm

📁 网络MPEG4IP流媒体开发源代码
💻 ASM
字号:
;/**************************************************************************; *; *	XVID MPEG-4 VIDEO CODEC; *	mmx quantization/dequantization; *; *	This program is an implementation of a part of one or more MPEG-4; *	Video tools as specified in ISO/IEC 14496-2 standard.  Those intending; *	to use this software module in hardware or software products are; *	advised that its use may infringe existing patents or copyrights, and; *	any such use would be at such party's own risk.  The original; *	developer of this software module and his/her company, and subsequent; *	editors and their companies, will have no liability for use of this; *	software or modifications or derivatives thereof.; *; *	This program is free software; you can redistribute it and/or modify; *	it under the terms of the GNU General Public License as published by; *	the Free Software Foundation; either version 2 of the License, or; *	(at your option) any later version.; *; *	This program is distributed in the hope that it will be useful,; *	but WITHOUT ANY WARRANTY; without even the implied warranty of; *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *	GNU General Public License for more details.; *; *	You should have received a copy of the GNU General Public License; *	along with this program; if not, write to the Free Software; *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.; *; *************************************************************************/;/**************************************************************************; *; *	History:; *; * 26.12.2001	minor bug fixes, dequant saturate, further optimization; * 19.11.2001  quant_inter_mmx now returns sum of abs. coefficient values; *	04.11.2001	nasm version; (c)2001 peter ross <pross@cs.rmit.edu.au>; *; *************************************************************************/; enable dequant saturate [-2048,2047], test purposes only.%define SATURATE; data/text alignment%define ALIGN 8bits 32section .data%macro cglobal 1 	%ifdef PREFIX		global _%1 		%define %1 _%1	%else		global %1	%endif%endmacroplus_one times 4	dw	 1;===========================================================================;; subtract by Q/2 table;;===========================================================================%macro MMX_SUB  1times 4 dw %1 / 2%endmacroalign ALIGNmmx_sub		MMX_SUB 1		MMX_SUB 2		MMX_SUB 3		MMX_SUB 4		MMX_SUB 5		MMX_SUB 6		MMX_SUB 7		MMX_SUB 8		MMX_SUB 9		MMX_SUB 10		MMX_SUB 11		MMX_SUB 12		MMX_SUB 13		MMX_SUB 14		MMX_SUB 15		MMX_SUB 16		MMX_SUB 17		MMX_SUB 18		MMX_SUB 19		MMX_SUB 20		MMX_SUB 21		MMX_SUB 22		MMX_SUB 23		MMX_SUB 24		MMX_SUB 25		MMX_SUB 26		MMX_SUB 27		MMX_SUB 28		MMX_SUB 29		MMX_SUB 30		MMX_SUB 31;===========================================================================;; divide by 2Q table ;; use a shift of 16 to take full advantage of _pmulhw_; for q=1, _pmulhw_ will overflow so it is treated seperately; (3dnow2 provides _pmulhuw_ which wont cause overflow);;===========================================================================%macro MMX_DIV  1times 4 dw  (1 << 16) / (%1 * 2) + 1%endmacroalign ALIGNmmx_div		MMX_DIV 1		MMX_DIV 2		MMX_DIV 3		MMX_DIV 4		MMX_DIV 5		MMX_DIV 6		MMX_DIV 7		MMX_DIV 8		MMX_DIV 9		MMX_DIV 10		MMX_DIV 11		MMX_DIV 12		MMX_DIV 13		MMX_DIV 14		MMX_DIV 15		MMX_DIV 16		MMX_DIV 17		MMX_DIV 18		MMX_DIV 19		MMX_DIV 20		MMX_DIV 21		MMX_DIV 22		MMX_DIV 23		MMX_DIV 24		MMX_DIV 25		MMX_DIV 26		MMX_DIV 27		MMX_DIV 28		MMX_DIV 29		MMX_DIV 30		MMX_DIV 31;===========================================================================;; add by (odd(Q) ? Q : Q - 1) table;;===========================================================================%macro MMX_ADD  1%if %1 % 2 != 0times 4 dw %1%elsetimes 4 dw %1 - 1%endif%endmacroalign ALIGNmmx_add		MMX_ADD 1		MMX_ADD 2		MMX_ADD 3		MMX_ADD 4		MMX_ADD 5		MMX_ADD 6		MMX_ADD 7		MMX_ADD 8		MMX_ADD 9		MMX_ADD 10		MMX_ADD 11		MMX_ADD 12		MMX_ADD 13		MMX_ADD 14		MMX_ADD 15		MMX_ADD 16		MMX_ADD 17		MMX_ADD 18		MMX_ADD 19		MMX_ADD 20		MMX_ADD 21		MMX_ADD 22		MMX_ADD 23		MMX_ADD 24		MMX_ADD 25		MMX_ADD 26		MMX_ADD 27		MMX_ADD 28		MMX_ADD 29		MMX_ADD 30		MMX_ADD 31;===========================================================================;; multiple by 2Q table;;===========================================================================%macro MMX_MUL  1times 4 dw %1 * 2%endmacroalign ALIGNmmx_mul		MMX_MUL 1		MMX_MUL 2		MMX_MUL 3		MMX_MUL 4		MMX_MUL 5		MMX_MUL 6		MMX_MUL 7		MMX_MUL 8		MMX_MUL 9		MMX_MUL 10		MMX_MUL 11		MMX_MUL 12		MMX_MUL 13		MMX_MUL 14		MMX_MUL 15		MMX_MUL 16		MMX_MUL 17		MMX_MUL 18		MMX_MUL 19		MMX_MUL 20		MMX_MUL 21		MMX_MUL 22		MMX_MUL 23		MMX_MUL 24		MMX_MUL 25		MMX_MUL 26		MMX_MUL 27		MMX_MUL 28		MMX_MUL 29		MMX_MUL 30		MMX_MUL 31;===========================================================================;; saturation limits ;;===========================================================================align ALIGNmmx_32768_minus_2048				times 4 dw (32768-2048)mmx_32767_minus_2047				times 4 dw (32767-2047)section .text;===========================================================================;; void quant_intra_mmx(int16_t * coeff, ;					const int16_t const * data,;					const uint32_t quant,;					const uint32_t dcscalar);;;===========================================================================align ALIGNcglobal quant_intra_mmxquant_intra_mmx		push	ecx		push	esi		push	edi		mov	edi, [esp + 12 + 4]		; coeff		mov	esi, [esp + 12 + 8]		; data		mov	eax, [esp + 12 + 12]		; quant		xor ecx, ecx		cmp	al, 1		jz	.q1loop		movq	mm7, [mmx_div + eax * 8 - 8]align ALIGN.loop		movq	mm0, [esi + 8*ecx]		; mm0 = [1st]		movq	mm3, [esi + 8*ecx + 8]	; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		; 		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		;		psubw	mm0, mm1		; displace		psubw	mm3, mm4		;		pmulhw	mm0, mm7		; mm0 = (mm0 / 2Q) >> 16		pmulhw	mm3, mm7		;		pxor	mm0, mm1		; mm0 *= sign(mm0)		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		;		movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3				add ecx,2		cmp ecx,16		jnz 	.loop .done		; caclulate  data[0] // (int32_t)dcscalar)		mov 	ecx, [esp + 12 + 16]	; dcscalar		mov 	edx, ecx		movsx 	eax, word [esi]	; data[0]		shr 	edx, 1			; edx = dcscalar /2		cmp		eax, 0		jg		.gtzero		sub		eax, edx		jmp		short .mul.gtzero		add		eax, edx.mul		cdq 				; expand eax -> edx:eax		idiv	ecx			; eax = edx:eax / dcscalar				mov	[edi], ax		; coeff[0] = ax		pop	edi		pop	esi		pop	ecx		ret				align ALIGN.q1loop		movq	mm0, [esi + 8*ecx]		; mm0 = [1st]		movq	mm3, [esi + 8*ecx + 8]	; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		; 		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		; 		psrlw	mm0, 1			; mm0 >>= 1   (/2)		psrlw	mm3, 1			;		pxor	mm0, mm1		; mm0 *= sign(mm0)		pxor	mm3, mm4        ;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		;		movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3		add ecx,2		cmp ecx,16		jnz	.q1loop		jmp	short .done;===========================================================================;; uint32_t quant_inter_mmx(int16_t * coeff,;					const int16_t const * data,;					const uint32_t quant);;;===========================================================================align ALIGNcglobal quant_inter_mmx		quant_inter_mmx		push	ecx		push	esi		push	edi		mov	edi, [esp + 12 + 4]		; coeff		mov	esi, [esp + 12 + 8]		; data		mov	eax, [esp + 12 + 12]	; quant		xor ecx, ecx		pxor mm5, mm5					; sum		movq mm6, [mmx_sub + eax * 8 - 8]	; sub		cmp	al, 1		jz  .q1loop		movq	mm7, [mmx_div + eax * 8 - 8]	; divideralign ALIGN.loop		movq	mm0, [esi + 8*ecx]		; mm0 = [1st]		movq	mm3, [esi + 8*ecx + 8]	; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		; 		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		; 		psubusw	mm0, mm6		; mm0 -= sub (unsigned, dont go < 0)		psubusw	mm3, mm6		;		pmulhw	mm0, mm7		; mm0 = (mm0 / 2Q) >> 16		pmulhw	mm3, mm7		; 		paddw	mm5, mm0		; sum += mm0		pxor	mm0, mm1		; mm0 *= sign(mm0)		paddw	mm5, mm3		;		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3		add ecx, 2			cmp ecx, 16		jnz .loop.done		pmaddwd mm5, [plus_one]		movq    mm0, mm5		psrlq   mm5, 32		paddd   mm0, mm5		movd	eax, mm0		; return sum		pop	edi		pop	esi		pop ecx		retalign ALIGN.q1loop		movq	mm0, [esi + 8*ecx]		; mm0 = [1st]		movq	mm3, [esi + 8*ecx+ 8]		; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		; 		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		; 		psubusw	mm0, mm6		; mm0 -= sub (unsigned, dont go < 0)		psubusw	mm3, mm6		;		psrlw	mm0, 1			; mm0 >>= 1   (/2)		psrlw	mm3, 1			;		paddw	mm5, mm0		; sum += mm0		pxor	mm0, mm1		; mm0 *= sign(mm0)		paddw	mm5, mm3		;		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3				add ecx,2		cmp ecx,16		jnz	.q1loop		jmp	.done;===========================================================================;; void dequant_intra_mmx(int16_t *data,;					const int16_t const *coeff,;					const uint32_t quant,;					const uint32_t dcscalar);;;===========================================================================align ALIGNcglobal dequant_intra_mmxdequant_intra_mmx		push	esi		push	edi		mov	edi, [esp + 8 + 4]		; data		mov	esi, [esp + 8 + 8]		; coeff		mov	eax, [esp + 8 + 12]		; quant		movq	mm6, [mmx_add + eax * 8 - 8]		movq	mm7, [mmx_mul + eax * 8 - 8]		xor eax, eaxalign ALIGN.loop		movq	mm0, [esi + 8*eax]		; mm0 = [coeff]		movq	mm3, [esi + 8*eax + 8]	; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		; 		pxor	mm2, mm2		; mm2 = 0		pxor	mm5, mm5		;		pcmpeqw	mm2, mm0		; mm2 = (0 == mm0)		pcmpeqw	mm5, mm3		; 		pandn   mm2, mm6		; mm2 = (iszero ? 0 : add)		pandn   mm5, mm6		;		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		; 		pmullw	mm0, mm7		; mm0 *= 2Q		pmullw	mm3, mm7		; 		paddw	mm0, mm2		; mm0 += mm2 (add)		paddw	mm3, mm5		;		pxor	mm0, mm1		; mm0 *= sign(mm0)		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4%ifdef SATURATE		movq mm2, [mmx_32767_minus_2047] 		movq mm4, [mmx_32768_minus_2048] 		paddsw	mm0, mm2		paddsw	mm3, mm2		psubsw	mm0, mm2		psubsw	mm3, mm2		psubsw	mm0, mm4		psubsw	mm3, mm4		paddsw	mm0, mm4		paddsw	mm3, mm4%endif		movq	[edi + 8*eax], mm0		; [data] = mm0		movq	[edi + 8*eax + 8], mm3		add eax, 2		cmp eax, 16		jnz	near .loop		mov	ax, [esi]					; ax = data[0]		imul ax, [esp + 8 + 16]			; eax = data[0] * dcscalar%ifdef SATURATE		cmp ax, -2048		jl .set_n2048		cmp ax, 2047		jg .set_2047%endif		mov	[edi], ax		pop	edi		pop	esi		ret%ifdef SATURATEalign ALIGN.set_n2048		mov	word [edi], -2048		pop	edi		pop	esi		ret	align ALIGN.set_2047		mov	word [edi], 2047		pop	edi		pop	esi		ret%endif;===========================================================================;; void dequant_inter_mmx(int16_t * data,;					const int16_t * const coeff,;					const uint32_t quant);;;===========================================================================align ALIGNcglobal dequant_inter_mmxdequant_inter_mmx		push 	esi		push 	edi		mov 	edi, [esp + 8 + 4]	; data		mov 	esi, [esp + 8 + 8]	; coeff		mov 	eax, [esp + 8 + 12]	; quant		movq	mm6, [mmx_add + eax * 8 - 8]		movq	mm7, [mmx_mul + eax * 8 - 8]				xor eax, eaxalign ALIGN.loop		movq	mm0, [esi + 8*eax]			; mm0 = [coeff]		movq	mm3, [esi + 8*eax + 8]		; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		; 		pxor	mm2, mm2		; mm2 = 0		pxor	mm5, mm5		;		pcmpeqw	mm2, mm0		; mm2 = (0 == mm0)		pcmpeqw	mm5, mm3		; 		pandn   mm2, mm6		; mm2 = (iszero ? 0 : add)		pandn   mm5, mm6		;		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		; 		pmullw	mm0, mm7		; mm0 *= 2Q		pmullw	mm3, mm7		; 		paddw	mm0, mm2		; mm0 += mm2 (add)		paddw	mm3, mm5		;		pxor	mm0, mm1		; mm0 *= sign(mm0)		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4%ifdef SATURATE		movq mm2, [mmx_32767_minus_2047] 		movq mm4, [mmx_32768_minus_2048] 		paddsw	mm0, mm2		paddsw	mm3, mm2		psubsw	mm0, mm2		psubsw	mm3, mm2		psubsw	mm0, mm4		psubsw	mm3, mm4		paddsw	mm0, mm4		paddsw	mm3, mm4%endif		movq	[edi + 8*eax], mm0		movq	[edi + 8*eax + 8], mm3		add eax, 2		cmp eax, 16		jnz	near .loop		pop 	edi		pop 	esi		ret

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -