mmxp2_32.asm

来自「网络MPEG4IP流媒体开发源代码」· 汇编代码 · 共 387 行
ASM
387 行
;; pII-optimised MMX format converters for HERMES; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk);   and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au); This source code is licensed under the GNU LGPL; ; Please refer to the file COPYING.LIB contained in the distribution for; licensing conditions		;; COPYRIGHT NOTICE; ; This file partly contains code that is (c) Intel Corporation, specifically; the mode detection routine, and the converter to 15 bit (8 pixel; conversion routine from the mmx programming tutorial pages).;;; These routines aren't exactly pII optimised - it's just that as they; are, they're terrible on p5 MMXs, but less so on pIIs.  Someone needs to; optimise them for p5 MMXs..BITS 32	GLOBAL _ConvertMMXpII32_24RGB888GLOBAL _ConvertMMXpII32_16RGB565GLOBAL _ConvertMMXpII32_16BGR565GLOBAL _ConvertMMXpII32_16RGB555GLOBAL _ConvertMMXpII32_16BGR555EXTERN _mmxreturn SECTION .data	ALIGN 8;; Constants for conversion routinesmmx32_rgb888_mask dd 00ffffffh,00ffffffhmmx32_rgb565_b dd 000000f8h, 000000f8hmmx32_rgb565_g dd 0000fc00h, 0000fc00hmmx32_rgb565_r dd 00f80000h, 00f80000hmmx32_rgb555_rb dd 00f800f8h,00f800f8hmmx32_rgb555_g dd 0000f800h,0000f800hmmx32_rgb555_mul dd 20000008h,20000008hmmx32_bgr555_mul dd 00082000h,00082000h			SECTION .text_ConvertMMXpII32_24RGB888:        ; set up mm6 as the mask, mm7 as zero        movq mm6, qword [mmx32_rgb888_mask]        pxor mm7, mm7        mov edx, ecx                    ; save ecx        and ecx, 0fffffffch             ; clear lower two bits        jnz .L1        jmp .L2.L1:        movq mm0, [esi]                 ; A R G B a r g b        pand mm0, mm6                   ; 0 R G B 0 r g b        movq mm1, [esi+8]               ; A R G B a r g b        pand mm1, mm6                   ; 0 R G B 0 r g b        movq mm2, mm0                   ; 0 R G B 0 r g b        punpckhdq mm2, mm7              ; 0 0 0 0 0 R G B        punpckldq mm0, mm7              ; 0 0 0 0 0 r g b        psllq mm2, 24                   ; 0 0 R G B 0 0 0        por mm0, mm2                    ; 0 0 R G B r g b        movq mm3, mm1                   ; 0 R G B 0 r g b        psllq mm3, 48                   ; g b 0 0 0 0 0 0        por mm0, mm3                    ; g b R G B r g b        movq mm4, mm1                   ; 0 R G B 0 r g b        punpckhdq mm4, mm7              ; 0 0 0 0 0 R G B        punpckldq mm1, mm7              ; 0 0 0 0 0 r g b        psrlq mm1, 16                   ; 0 0 0 R G B 0 r        psllq mm4, 8                    ; 0 0 0 0 R G B 0        por mm1, mm4                    ; 0 0 0 0 R G B r        movq [edi], mm0        add esi, BYTE 16        movd [edi+8], mm1        add edi, BYTE 12        sub ecx, BYTE 4        jnz .L1.L2:        mov ecx, edx        and ecx, BYTE 3        jz .L4.L3:        mov al, [esi]        mov bl, [esi+1]        mov dl, [esi+2]        mov [edi], al        mov [edi+1], bl        mov [edi+2], dl        add esi, BYTE 4        add edi, BYTE 3        dec ecx        jnz .L3.L4:        jmp _mmxreturn_ConvertMMXpII32_16RGB565:        ; set up masks        movq mm5, [mmx32_rgb565_b]        movq mm6, [mmx32_rgb565_g]        movq mm7, [mmx32_rgb565_r]        mov edx, ecx        shr ecx, 2        jnz .L1        jmp .L2         ; not necessary at the moment, but doesn't hurt (much).L1:        movq mm0, [esi]         ; argb        movq mm1, mm0           ; argb        pand mm0, mm6           ; 00g0        movq mm3, mm1           ; argb        pand mm1, mm5           ; 000b        pand mm3, mm7           ; 0r00        pslld mm1, 2            ; 0 0 000000bb bbb00000        por mm0, mm1            ; 0 0 ggggggbb bbb00000        psrld mm0, 5            ; 0 0 00000ggg gggbbbbb        movq mm4, [esi+8]       ; argb        movq mm2, mm4           ; argb        pand mm4, mm6           ; 00g0        movq mm1, mm2           ; argb        pand mm2, mm5           ; 000b        pand mm1, mm7           ; 0r00        pslld mm2, 2            ; 0 0 000000bb bbb00000        por mm4, mm2            ; 0 0 ggggggbb bbb00000        psrld mm4, 5            ; 0 0 00000ggg gggbbbbb        packuswb mm3, mm1       ; R 0 r 0        packssdw mm0, mm4       ; as above.. ish        por mm0, mm3            ; done.        movq [edi], mm0        add esi, 16        add edi, 8        dec ecx        jnz .L1.L2:        mov ecx, edx        and ecx, BYTE 3        jz .L4.L3:        mov al, [esi]        mov bh, [esi+1]        mov ah, [esi+2]        shr al, 3        and eax, 0F81Fh            ; BYTE?        shr ebx, 5        and ebx, 07E0h             ; BYTE?        add eax, ebx        mov [edi], al        mov [edi+1], ah        add esi, BYTE 4        add edi, BYTE 2        dec ecx        jnz .L3.L4:	jmp _mmxreturn	_ConvertMMXpII32_16BGR565:        movq mm5, [mmx32_rgb565_r]        movq mm6, [mmx32_rgb565_g]        movq mm7, [mmx32_rgb565_b]        mov edx, ecx        shr ecx, 2        jnz .L1        jmp .L2.L1:        movq mm0, [esi]                 ; a r g b        movq mm1, mm0                   ; a r g b        pand mm0, mm6                   ; 0 0 g 0        movq mm3, mm1                   ; a r g b        pand mm1, mm5                   ; 0 r 0 0        pand mm3, mm7                   ; 0 0 0 b        psllq mm3, 16                   ; 0 b 0 0        psrld mm1, 14                   ; 0 0 000000rr rrr00000        por mm0, mm1                    ; 0 0 ggggggrr rrr00000        psrld mm0, 5                    ; 0 0 00000ggg gggrrrrr        movq mm4, [esi+8]               ; a r g b        movq mm2, mm4                   ; a r g b        pand mm4, mm6                   ; 0 0 g 0        movq mm1, mm2                   ; a r g b        pand mm2, mm5                   ; 0 r 0 0        pand mm1, mm7                   ; 0 0 0 b        psllq mm1, 16                   ; 0 b 0 0        psrld mm2, 14                   ; 0 0 000000rr rrr00000        por mm4, mm2                    ; 0 0 ggggggrr rrr00000        psrld mm4, 5                    ; 0 0 00000ggg gggrrrrr        packuswb mm3, mm1               ; BBBBB000 00000000 bbbbb000 00000000        packssdw mm0, mm4               ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR        por mm0, mm3                    ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr        movq [edi], mm0        add esi, BYTE 16        add edi, BYTE 8        dec ecx        jnz .L1.L2:        and edx, BYTE 3        jz .L4.L3:        mov al, [esi+2]        mov bh, [esi+1]        mov ah, [esi]        shr al, 3        and eax, 0F81Fh                    ; BYTE ?        shr ebx, 5        and ebx, 07E0h                     ; BYTE ?        add eax, ebx        mov [edi], al        mov [edi+1], ah        add esi, BYTE 4        add edi, BYTE 2        dec edx        jnz .L3.L4:        jmp _mmxreturn_ConvertMMXpII32_16BGR555:        ; the 16BGR555 converter is identical to the RGB555 one,        ; except it uses a different multiplier for the pmaddwd        ; instruction.  cool huh.        movq mm7, qword [mmx32_bgr555_mul]        jmp _convert_bgr555_cheat; This is the same as the Intel version.. they obviously went to; much more trouble to expand/coil the loop than I did, so theirs; would almost certainly be faster, even if only a little.; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is; (I think) a more accurate name.._ConvertMMXpII32_16RGB555:        movq mm7,qword [mmx32_rgb555_mul]_convert_bgr555_cheat:        movq mm6,qword [mmx32_rgb555_g]        	mov edx,ecx		           ; Save ecx         and ecx,BYTE 0fffffff8h            ; clear lower three bits	jnz .L_OK        jmp near .L2 .L_OK:		movq mm2,[esi+8]	movq mm0,[esi]	movq mm3,mm2	pand mm3,qword [mmx32_rgb555_rb]	movq mm1,mm0	pand mm1,qword [mmx32_rgb555_rb]	pmaddwd mm3,mm7	pmaddwd mm1,mm7	pand mm2,mm6.L1:	movq mm4,[esi+24]	pand mm0,mm6	movq mm5,[esi+16]	por mm3,mm2	psrld mm3,6	por mm1,mm0	movq mm0,mm4	psrld mm1,6	pand mm0,qword [mmx32_rgb555_rb]	packssdw mm1,mm3	movq mm3,mm5	pmaddwd mm0,mm7	pand mm3,qword [mmx32_rgb555_rb]	pand mm4,mm6	movq [edi],mm1				pmaddwd mm3,mm7        add esi,BYTE 32	por mm4,mm0	pand mm5,mm6	psrld mm4,6	movq mm2,[esi+8]	por mm5,mm3	movq mm0,[esi]	psrld mm5,6	movq mm3,mm2	movq mm1,mm0	pand mm3,qword [mmx32_rgb555_rb]	packssdw mm5,mm4	pand mm1,qword [mmx32_rgb555_rb]	pand mm2,mm6	movq [edi+8],mm5	pmaddwd mm3,mm7	pmaddwd mm1,mm7        add edi,BYTE 16	        sub ecx,BYTE 8	jz .L2        jmp .L1.L2:		mov ecx,edx	        and ecx,BYTE 7	jz .L4	.L3:		mov ebx,[esi]        add esi,BYTE 4	        mov eax,ebx        mov edx,ebx        shr eax,3        shr edx,6        and eax,BYTE 0000000000011111b        and edx,     0000001111100000b        shr ebx,9        or eax,edx        and ebx,     0111110000000000b        or eax,ebx        mov [edi],ax        add edi,BYTE 2	dec ecx	jnz .L3	.L4:			jmp _mmxreturn
mmxp2_32.asm - 源码说明

本页面展示了「网络MPEG4IP流媒体开发源代码」中的 mmxp2_32.asm 源码文件，采用汇编编程语言编写，共 387 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与MPEG4IP相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?