⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 quantize4_mmx.asm

📁 网络MPEG4IP流媒体开发源代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
		pmulhw  mm0, mm2		; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i]		movq    mm2, [intra_matrix + 8*ecx + 8]		psrlw   mm2, 1		paddw   mm3, mm2		movq    mm2, [intra_matrix_fix + ecx*8 + 8]		pmulhw  mm3, mm2        paddw   mm0, mm5		paddw   mm3, mm5		psrlw	mm0, 2			; mm0 >>= 1   (/4)		psrlw	mm3, 2			;				pxor	mm0, mm1		; mm0 *= sign(mm0)		pxor	mm3, mm4        ;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		;				movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3		add ecx,2		cmp ecx,16		jnz	near .q2loop		jmp	near .done;===========================================================================;; uint32_t quant4_inter_mmx(int16_t * coeff,;					const int16_t const * data,;					const uint32_t quant);;;===========================================================================align ALIGNcglobal quant4_inter_mmx		quant4_inter_mmx		push	ecx		push	esi		push	edi		mov	edi, [esp + 12 + 4]		; coeff		mov	esi, [esp + 12 + 8]		; data		mov	eax, [esp + 12 + 12]	; quant		xor ecx, ecx		pxor mm5, mm5					; sum		cmp	al, 1		jz  near .q1loop		cmp	al, 2		jz  near .q2loop		movq	mm7, [mmx_div + eax * 8 - 8]	; divideralign ALIGN.loop		movq	mm0, [esi + 8*ecx]		; mm0 = [1st]		movq	mm3, [esi + 8*ecx + 8]	; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		; 		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		;		psllw   mm0, 4		psllw   mm3, 4				movq    mm2, [inter_matrix + 8*ecx]		psrlw   mm2, 1		paddw   mm0, mm2				movq    mm2, [inter_matrix_fix + ecx*8]		pmulhw  mm0, mm2		; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i]		movq    mm2, [inter_matrix + 8*ecx + 8]		psrlw   mm2, 1		paddw   mm3, mm2		movq    mm2, [inter_matrix_fix + ecx*8 + 8]		pmulhw  mm3, mm2		pmulhw	mm0, mm7		; mm0 = (mm0 / 2Q) >> 16		pmulhw	mm3, mm7		; 		psrlw   mm0, 1			; additional shift by 1 => 16 + 1 = 17		psrlw   mm3, 1				paddw	mm5, mm0		; sum += mm0		pxor	mm0, mm1		; mm0 *= sign(mm0)		paddw	mm5, mm3		;		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3		add ecx, 2			cmp ecx, 16		jnz near .loop.done		pmaddwd mm5, [mmx_one]		movq    mm0, mm5		psrlq   mm5, 32		paddd   mm0, mm5		movd	eax, mm0		; return sum		pop	edi		pop	esi		pop ecx		retalign ALIGN.q1loop		movq	mm0, [esi + 8*ecx]		; mm0 = [1st]		movq	mm3, [esi + 8*ecx+ 8]				; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		;		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		;				psllw   mm0, 4		psllw   mm3, 4				movq    mm2, [inter_matrix + 8*ecx]		psrlw   mm2, 1		paddw   mm0, mm2				movq    mm2, [inter_matrix_fix + ecx*8]		pmulhw  mm0, mm2		; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i]		movq    mm2, [inter_matrix + 8*ecx + 8]		psrlw   mm2, 1		paddw   mm3, mm2		movq    mm2, [inter_matrix_fix + ecx*8 + 8]		pmulhw  mm3, mm2 		psrlw	mm0, 1			; mm0 >>= 1   (/2)		psrlw	mm3, 1			;		paddw	mm5, mm0		; sum += mm0		pxor	mm0, mm1		; mm0 *= sign(mm0)		paddw	mm5, mm3		;		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3				add ecx,2		cmp ecx,16		jnz	near .q1loop		jmp	.donealign ALIGN.q2loop		movq	mm0, [esi + 8*ecx]		; mm0 = [1st]		movq	mm3, [esi + 8*ecx+ 8]				; 		pxor	mm1, mm1		; mm1 = 0		pxor	mm4, mm4		;		pcmpgtw	mm1, mm0		; mm1 = (0 > mm0)		pcmpgtw	mm4, mm3		;		pxor	mm0, mm1		; mm0 = |mm0|		pxor	mm3, mm4		; 		psubw	mm0, mm1		; displace		psubw	mm3, mm4		;				psllw   mm0, 4		psllw   mm3, 4				movq    mm2, [inter_matrix + 8*ecx]		psrlw   mm2, 1		paddw   mm0, mm2				movq    mm2, [inter_matrix_fix + ecx*8]		pmulhw  mm0, mm2		; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i]		movq    mm2, [inter_matrix + 8*ecx + 8]		psrlw   mm2, 1		paddw   mm3, mm2		movq    mm2, [inter_matrix_fix + ecx*8 + 8]		pmulhw  mm3, mm2 		psrlw	mm0, 2			; mm0 >>= 1   (/2)		psrlw	mm3, 2			;		paddw	mm5, mm0		; sum += mm0		pxor	mm0, mm1		; mm0 *= sign(mm0)		paddw	mm5, mm3		;		pxor	mm3, mm4		;		psubw	mm0, mm1		; undisplace		psubw	mm3, mm4		movq	[edi + 8*ecx], mm0		movq	[edi + 8*ecx + 8], mm3				add ecx,2		cmp ecx,16		jnz	near .q2loop		jmp	.done;===========================================================================;; void dequant4_intra_mmx(int16_t *data,;                    const int16_t const *coeff,;                    const uint32_t quant,;                    const uint32_t dcscalar);;;===========================================================================align 16cglobal dequant4_intra_mmxdequant4_intra_mmx        push    esi        push    edi        mov    edi, [esp + 8 + 4]        ; data        mov    esi, [esp + 8 + 8]        ; coeff        mov    eax, [esp + 8 + 12]        ; quant                movq mm7, [mmx_mul_quant  + eax*8 - 8]            xor eax, eaxalign 16        .loop        movq    mm0, [esi + 8*eax]        ; mm0 = [coeff]                pxor    mm1, mm1                ; mm1 = 0        pcmpeqw    mm1, mm0                ; mm1 = (0 == mm0)        pxor    mm2, mm2                ; mm2 = 0        pcmpgtw    mm2, mm0                ; mm2 = (0 > mm0)        pxor    mm0, mm2                ; mm0 = |mm0|        psubw    mm0, mm2                ; displace        pmullw    mm0, mm7                ; mm0 *= quant                movq    mm3, [intra_matrix + 8*eax]        movq  mm4, mm0                    ;        pmullw    mm0, mm3                ; mm0 = low(mm0 * mm3)        pmulhw    mm3, mm4                ; mm3 = high(mm0 * mm3)        movq    mm4, mm0                ; mm0,mm4 = unpack(mm3, mm0)        punpcklwd mm0, mm3                ;        punpckhwd mm4, mm3                ;        psrld mm0, 3                    ; mm0,mm4 /= 8        psrld mm4, 3                    ;        packssdw mm0, mm4                ; mm0 = pack(mm4, mm0)        pxor    mm0, mm2                ; mm0 *= sign(mm0)        psubw    mm0, mm2                ; undisplace        pandn    mm1, mm0                ; mm1 = ~(iszero) & mm0%ifdef SATURATE        movq mm2, [mmx_32767_minus_2047]         movq mm6, [mmx_32768_minus_2048]         paddsw    mm1, mm2        psubsw    mm1, mm2        psubsw    mm1, mm6        paddsw    mm1, mm6%endif        movq    [edi + 8*eax], mm1        ; [data] = mm0        add eax, 1        cmp eax, 16        jnz    near .loop        mov    ax, [esi]                    ; ax = data[0]        imul     ax, [esp + 8 + 16]        ; eax = data[0] * dcscalar        mov    [edi], ax                    ; data[0] = ax%ifdef SATURATE        cmp ax, -2048        jl .set_n2048        cmp ax, 2047        jg .set_2047%endif        pop    edi        pop    esi        ret%ifdef SATURATE.set_n2048        mov    word [edi], -2048        pop    edi        pop    esi        ret    .set_2047        mov    word [edi], 2047        pop    edi        pop    esi		ret%endif;===========================================================================;; void dequant4_inter_mmx(int16_t * data,;                    const int16_t * const coeff,;                    const uint32_t quant);;;===========================================================================align 16cglobal dequant4_inter_mmxdequant4_inter_mmx        push    esi        push    edi		        mov    edi, [esp + 8 + 4]        ; data        mov    esi, [esp + 8 + 8]        ; coeff        mov    eax, [esp + 8 + 12]        ; quant        movq mm7, [mmx_mul_quant  + eax*8 - 8]        movq mm6, [mmx_one]        xor eax, eax        pxor mm5, mm5        ; mismatch sumalign 16        .loop        movq    mm0, [esi + 8*eax]                        ; mm0 = [coeff]        pxor    mm1, mm1                ; mm1 = 0        pcmpeqw    mm1, mm0                ; mm1 = (0 == mm0)        pxor    mm2, mm2                ; mm2 = 0        pcmpgtw    mm2, mm0                ; mm2 = (0 > mm0)        pxor    mm0, mm2                ; mm0 = |mm0|        psubw    mm0, mm2                ; displace        psllw    mm0, 1                ;        paddsw    mm0, mm6            ; mm0 = 2*mm0 + 1        pmullw    mm0, mm7            ; mm0 *= quant        movq    mm3, [inter_matrix + 8*eax]        movq  mm4, mm0        pmullw    mm0, mm3            ; mm0 = low(mm0 * mm3)        pmulhw    mm3, mm4            ; mm3 = high(mm0 * mm3)        movq    mm4, mm0            ; mm0,mm4 = unpack(mm3, mm0)        punpcklwd mm0, mm3            ;        punpckhwd mm4, mm3            ;        psrad mm0, 4                ; mm0,mm4 /= 16        psrad mm4, 4                ;        packssdw mm0, mm4            ; mm0 = pack(mm4, mm0)        pxor    mm0, mm2            ; mm0 *= sign(mm0)        psubw    mm0, mm2            ; undisplace        pandn    mm1, mm0            ; mm1 = ~(iszero) & mm0;%ifdef SATURATE        movq mm2, [mmx_32767_minus_2047]         movq mm4, [mmx_32768_minus_2048]        paddsw    mm1, mm2        psubsw    mm1, mm2        psubsw    mm1, mm4        paddsw    mm1, mm4;%endif        pxor mm5, mm1        ; mismatch            movq    [edi + 8*eax], mm1        ; [data] = mm0        add eax, 1        cmp eax, 16        jnz    near .loop        ; mismatch control        movq mm0, mm5        movq mm1, mm5        movq mm2, mm5        psrlq mm0, 48        psrlq mm1, 32        psrlq mm2, 16        pxor mm5, mm0        pxor mm5, mm1        pxor mm5, mm2        movd eax, mm5        test eax, 0x1        jnz .done        xor word [edi + 2*63], 1.done            pop    edi        pop    esi        ret

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -