📄 x86p_32.asm
字号:
;
; x86 format converters for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
;
BITS 32
GLOBAL _ConvertX86p32_32BGR888
GLOBAL _ConvertX86p32_32RGBA888
GLOBAL _ConvertX86p32_32BGRA888
GLOBAL _ConvertX86p32_24RGB888
GLOBAL _ConvertX86p32_24BGR888
GLOBAL _ConvertX86p32_16RGB565
GLOBAL _ConvertX86p32_16BGR565
GLOBAL _ConvertX86p32_16RGB555
GLOBAL _ConvertX86p32_16BGR555
GLOBAL _ConvertX86p32_8RGB332
EXTERN _x86return
SECTION .text
;; _Convert_*
;; Paramters:
;; ESI = source
;; EDI = dest
;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
;; Destroys:
;; EAX, EBX, EDX
_ConvertX86p32_32BGR888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov edx,[esi]
bswap edx
ror edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L1
.L2
jmp _x86return
.L3 ; save ebp
push ebp
; unroll four times
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L4 mov eax,[esi]
mov ebx,[esi+4]
bswap eax
bswap ebx
ror eax,8
mov ecx,[esi+8]
ror ebx,8
mov edx,[esi+12]
bswap ecx
bswap edx
ror ecx,8
mov [edi+0],eax
ror edx,8
mov [edi+4],ebx
mov [edi+8],ecx
mov [edi+12],edx
add esi,BYTE 16
add edi,BYTE 16
dec ebp
jnz .L4
; check tail
pop ecx
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
mov edx,[esi]
bswap edx
ror edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L5
.L6 pop ebp
jmp _x86return
_ConvertX86p32_32RGBA888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov edx,[esi]
rol edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L1
.L2
jmp _x86return
.L3 ; save ebp
push ebp
; unroll four times
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L4 mov eax,[esi]
mov ebx,[esi+4]
rol eax,8
mov ecx,[esi+8]
rol ebx,8
mov edx,[esi+12]
rol ecx,8
mov [edi+0],eax
rol edx,8
mov [edi+4],ebx
mov [edi+8],ecx
mov [edi+12],edx
add esi,BYTE 16
add edi,BYTE 16
dec ebp
jnz .L4
; check tail
pop ecx
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
mov edx,[esi]
rol edx,8
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L5
.L6 pop ebp
jmp _x86return
_ConvertX86p32_32BGRA888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov edx,[esi]
bswap edx
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L1
.L2
jmp _x86return
.L3 ; save ebp
push ebp
; unroll four times
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L4 mov eax,[esi]
mov ebx,[esi+4]
mov ecx,[esi+8]
mov edx,[esi+12]
bswap eax
bswap ebx
bswap ecx
bswap edx
mov [edi+0],eax
mov [edi+4],ebx
mov [edi+8],ecx
mov [edi+12],edx
add esi,BYTE 16
add edi,BYTE 16
dec ebp
jnz .L4
; check tail
pop ecx
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
mov edx,[esi]
bswap edx
mov [edi],edx
add esi,BYTE 4
add edi,BYTE 4
dec ecx
jnz .L5
.L6 pop ebp
jmp _x86return
;; 32 bit RGB 888 to 24 BIT RGB 888
_ConvertX86p32_24RGB888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L1
.L2
jmp _x86return
.L3 ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jmp SHORT .L3
.L4 ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L5 mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
shl eax,8 ; eax = [R][G][B][.]
mov ecx,[esi+12] ; third dword ecx = [a][r][g][b]
shl ebx,8 ; ebx = [r][g][b][.]
mov al,[esi+4] ; eax = [R][G][B][b]
ror eax,8 ; eax = [b][R][G][B] (done)
mov bh,[esi+8+1] ; ebx = [r][g][G][.]
mov [edi],eax
add edi,BYTE 3*4
shl ecx,8 ; ecx = [r][g][b][.]
mov bl,[esi+8+0] ; ebx = [r][g][G][B]
rol ebx,16 ; ebx = [G][B][r][g] (done)
mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done)
mov [edi+4-3*4],ebx
add esi,BYTE 4*4
mov [edi+8-3*4],ecx
dec ebp
jnz .L5
; check tail
pop ecx
and ecx,BYTE 11b
jz .L7
.L6 ; tail loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L6
.L7 pop ebp
jmp _x86return
;; 32 bit RGB 888 to 24 bit BGR 888
_ConvertX86p32_24BGR888:
; check short
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L1
.L2
jmp _x86return
.L3 ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jmp SHORT .L3
.L4 ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
; save count
push ecx
.L5
mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
bswap eax ; eax = [B][G][R][A]
bswap ebx ; ebx = [b][g][r][a]
mov al,[esi+4+2] ; eax = [B][G][R][r]
mov bh,[esi+4+4+1] ; ebx = [b][g][G][a]
ror eax,8 ; eax = [r][B][G][R] (done)
mov bl,[esi+4+4+2] ; ebx = [b][g][G][R]
ror ebx,16 ; ebx = [G][R][b][g] (done)
mov [edi],eax
mov [edi+4],ebx
mov ecx,[esi+12] ; third dword ecx = [a][r][g][b]
bswap ecx ; ecx = [b][g][r][a]
mov cl,[esi+8] ; ecx = [b][g][r][B] (done)
add esi,BYTE 4*4
mov [edi+8],ecx
add edi,BYTE 3*4
dec ebp
jnz .L5
; check tail
pop ecx
and ecx,BYTE 11b
jz .L7
.L6 ; tail loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
mov [edi],al
mov [edi+1],bl
mov [edi+2],dl
add esi,BYTE 4
add edi,BYTE 3
dec ecx
jnz .L6
.L7
pop ebp
jmp _x86return
;; 32 bit RGB 888 to 16 BIT RGB 565
_ConvertX86p32_16RGB565:
; check short
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
jnz .L1
.L2: ; End of short loop
jmp _x86return
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
shr ah,3
and al,11111100b
shl eax,3
shr bl,3
add al,bl
mov [edi+0],al
mov [edi+1],ah
add esi,BYTE 4
add edi,BYTE 2
dec ecx
.L4:
; save count
push ecx
; unroll twice
shr ecx,1
; point arrays to end
lea esi,[esi+ecx*8]
lea edi,[edi+ecx*4]
; negative counter
neg ecx
jmp SHORT .L6
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -