📄 huffyuv_a.asm
字号:
;
; Huffyuv v2.1.1, by Ben Rudiak-Gould.
; http://www.math.berkeley.edu/~benrg/huffyuv.html
;
; This file is copyright 2000 Ben Rudiak-Gould, and distributed under
; the terms of the GNU General Public License, v2 or later. See
; http://www.gnu.org/copyleft/gpl.html.
;
;
; This file makes heavy use of macros to define a bunch of almost-identical
; functions -- see huffyuv_a.h.
;
.586
.mmx
.model flat
; alignment has to be 'page' so that I can use 'align 32' below
_TEXT64 segment page public use32 'CODE'
EXTERN C encode1_shift:BYTE
EXTERN C encode1_add_shifted:DWORD
EXTERN C encode2_shift:BYTE
EXTERN C encode2_add_shifted:DWORD
EXTERN C encode3_shift:BYTE
EXTERN C encode3_add_shifted:DWORD
EXTERN C decode1:DWORD
EXTERN C decode1_shift:BYTE
EXTERN C decode2:DWORD
EXTERN C decode2_shift:BYTE
EXTERN C decode3:DWORD
EXTERN C decode3_shift:BYTE
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
HUFF_CODEC_PROC_START MACRO
push ebp
push edi
push esi
push ebx
mov esi,[esp+4+16]
mov edi,[esp+8+16]
mov ebp,[esp+12+16]
mov eax,[esi]
ENDM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
HUFF_COMPRESS MACRO channel,index,back1,back2,increment
back = back1 or back2
IF back
mov cl,[esi+&index+&increment]
ELSE
movzx ebx,byte ptr [esi+&index+&increment]
ENDIF
; xor ebx,ebx
IF &back1
sub cl,[esi+&index+&increment-&back1]
ENDIF
IF &back2
sub cl,[esi+&index+&increment-&back2]
IF &back1
add cl,[esi+&index+&increment-&back1-&back2]
ENDIF
ENDIF
IF &increment
add esi,INCREMENT
ENDIF
; mov bl,cl
IF back
movzx ebx,cl
ENDIF
mov cl,&channel&_shift[ebx]
mov eax,&channel&_add_shifted[ebx*4]
add ch,cl
jl nostore_&index&
sub cl,ch
sub ch,32
shld edx,eax,cl
add cl,ch ; restore original cl (32 is added, but it doesn't matter because shld only looks at lower 5 bits)
mov [edi],edx
add edi,4
nostore_&index&:
shld edx,eax,cl
ENDM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
HUFF_COMPRESS_END MACRO loopname
cmp esi,ebp
jnz &loopname
cmp ch,-32
jle noextra
mov cl,ch
neg cl
shl edx,cl
mov [edi],edx
add edi,4
noextra:
mov eax,edi
pop ebx
pop esi
pop edi
pop ebp
retn
ENDM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
HUFF_COMPRESS_PROC_YUV MACRO procname,uyvy,delta,decorrelate
PUBLIC C _&procname
;;unsigned long* __cdecl procname(
;; [esp+ 4] unsigned char* src,
;; [esp+ 8] unsigned long* dst,
;; [esp+12] unsigned char* src_end);
_&procname PROC
HUFF_CODEC_PROC_START
IF &uyvy
bswap eax
rol eax,16
ENDIF
mov [edi],eax
add edi,4
mov ch,-32
sub ebp,4
align 32
loop0:
HUFF_COMPRESS encode1,%0+&uyvy,%2*&delta,0,4
HUFF_COMPRESS encode2,%1-&uyvy,%4*&delta,0,0
HUFF_COMPRESS encode1,%2+&uyvy,%2*&delta,0,0
HUFF_COMPRESS encode3,%3-&uyvy,%4*&delta,%2*&decorrelate,0
HUFF_COMPRESS_END loop0
_&procname ENDP
ENDM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
HUFF_COMPRESS_PROC_YUV asm_CompressYUY2,0,0,0
HUFF_COMPRESS_PROC_YUV asm_CompressYUY2Delta,0,1,0
HUFF_COMPRESS_PROC_YUV asm_CompressUYVY,1,0,0
HUFF_COMPRESS_PROC_YUV asm_CompressUYVYDelta,1,1,0
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
HUFF_COMPRESS_PROC_RGB MACRO procname,rgba,decorrelate
PUBLIC C _&procname
;;unsigned char* __cdecl procname(
;; [esp+ 4] unsigned char* src,
;; [esp+ 8] unsigned char* dst,
;; [esp+12] unsigned char* src_end);
_&procname PROC
HUFF_CODEC_PROC_START
IFE &rgba
shl eax,8
ENDIF
mov [edi],eax
add edi,4
mov ch,-32
sub ebp,3+&rgba
align 32
loop0:
IF &decorrelate
HUFF_COMPRESS encode2,1,%3+&rgba,0,%3+&rgba
HUFF_COMPRESS encode1,0,%3+&rgba,-1,0
HUFF_COMPRESS encode3,2,%3+&rgba,1,0
ELSE
HUFF_COMPRESS encode1,0,%3+&rgba,0,%3+&rgba
HUFF_COMPRESS encode2,1,%3+&rgba,0,0
HUFF_COMPRESS encode3,2,%3+&rgba,0,0
ENDIF
IF &rgba
HUFF_COMPRESS encode3,3,4,0,0
ENDIF
HUFF_COMPRESS_END loop0
_&procname ENDP
ENDM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
HUFF_COMPRESS_PROC_RGB asm_CompressRGBDelta,0,0
HUFF_COMPRESS_PROC_RGB asm_CompressRGBDeltaDecorrelate,0,1
HUFF_COMPRESS_PROC_RGB asm_CompressRGBADelta,1,0
HUFF_COMPRESS_PROC_RGB asm_CompressRGBADeltaDecorrelate,1,1
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
PUBLIC C _mmx_RowDiff
;void __cdecl mmx_RowDiff(
; [esp+ 4] unsigned char* src,
; [esp+ 8] unsigned char* dst,
; [esp+12] unsigned char* src_end,
; [esp+16] int stride);
_mmx_RowDiff PROC
push ebp
push edi
push esi
push ebx
mov esi,[esp+4+16]
mov edi,[esp+8+16]
mov ecx,[esp+16+16]
add ecx,esi
cmp esi,edi
je diff
; make sure we're 8-byte aligned
loop0:
test edi,7
jz endloop0
mov al,[esi]
inc esi
mov [edi],al
inc edi
jmp loop0
endloop0:
; copy the (rest of the) first row
loop1:
movq mm0,[esi]
movq mm1,[esi+8]
add esi,16
movq [edi],mm0
movq [edi+8],mm1
add edi,16
cmp esi,ecx
jb loop1
; diff the remaining rows
diff:
mov esi,[esp+12+16]
mov ecx,[esp+4+16]
mov edi,[esp+8+16]
mov ebx,[esp+16+16]
add edi,esi
sub edi,ecx
add ecx,ebx
neg ebx
; align again (sigh...)
loop2:
test edi,7
jz endloop2
mov al,[esi-1]
sub al,[esi+ebx-1]
dec esi
mov [edi-1],al
dec edi
jmp loop2
endloop2:
mov edx,32
sub esi,edx
sub edi,edx
align 32
loop3:
movq mm3,[esi+24]
movq mm2,[esi+16]
movq mm6,[esi+ebx+16]
psubb mm3,[esi+ebx+24] ; 2
psubb mm2,mm6
movq mm1,[esi+8]
movq [edi+24],mm3 ; 2
movq mm5,[esi+ebx+8]
movq mm0,[esi]
movq [edi+16],mm2 ; 2
psubb mm1,mm5
psubb mm0,[esi+ebx] ; 2
sub esi,edx
movq [edi+8],mm1 ; 2
cmp esi,ecx
movq [edi],mm0 ; 2
lea edi,[edi-32]
jae loop3
; and more alignment
add esi,edx
add edi,edx
loop4:
cmp esi,ecx
jbe endloop4
mov al,[esi-1]
sub al,[esi+ebx-1]
dec esi
mov [edi-1],al
dec edi
jmp loop4
endloop4:
emms
pop ebx
pop esi
pop edi
pop ebp
retn
_mmx_RowDiff ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
PUBLIC C _mmx_RowAccum
;void __cdecl mmx_RowAccum(
; [esp+ 4] unsigned char* buf,
; [esp+ 8] unsigned char* buf_end,
; [esp+12] int stride);
_mmx_RowAccum PROC
push ebp
push esi
push ebx
mov esi,[esp+4+12]
mov ebx,[esp+12+12]
add esi,ebx
neg ebx
loop0:
test esi,7
jz endloop0
mov al,[esi+ebx]
add [esi],al
inc esi
jmp loop0
endloop0:
mov ecx,[esp+8+12]
sub ecx,32
align 32
loop1:
movq mm0,[esi]
movq mm1,[esi+8]
movq mm5,[esi+ebx+24]
paddb mm0,[esi+ebx]
movq mm2,[esi+16]
movq mm4,[esi+ebx+16]
paddb mm1,[esi+ebx+8]
movq mm3,[esi+24]
paddb mm2,mm4
movq [esi],mm0
paddb mm3,mm5
movq [esi+8],mm1
movq [esi+16],mm2
movq [esi+24],mm3
add esi,32
cmp esi,ecx
jbe loop1
; cleanup end in case of misalignment
add ecx,32
loop2:
cmp esi,ecx
jae endloop2
mov al,[esi+ebx]
add [esi],al
inc esi
jmp loop2
endloop2:
emms
pop ebx
pop esi
pop ebp
retn
_mmx_RowAccum ENDP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
YUV_SHIFT MACRO mmb,mma,uyvy ; clobbers mm4,5
; mma:mmb = ABCDEFGHIJKLMNOP (VYUYVYUY...) - backwards from mem order
; we want mmb = EDGFIHKJ (prev pixel of same channel)
movq mm4,mmb
punpcklbw mmb,mma ; mm4:mmb = AIBJCKDLEMFNGOHP
punpckhbw mm4,mma
movq mm5,mmb
punpcklbw mmb,mm4 ; mm5:mmb = AEIMBFJNCGKODHLP
punpckhbw mm5,mm4
movq mm4,mmb
punpcklbw mmb,mm5 ; mm4:mmb = ACEGIKMOBDFHJLNP
punpckhbw mm4,mm5
psllq mmb,8+8*&uyvy ; mm4:mmb = EGIKMO__DFHJLNP_
psllq mm4,16-8*&uyvy
punpckhbw mmb,mm4 ; mmb = EDGFIHKJ (for YUY2; different for UYVY)
ENDM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
MEDIAN_PREDICT_PROC MACRO procname,uyvy
PUBLIC C _&procname
;void __cdecl mmx_MedianPredict(
; [esp+ 4] unsigned char* src,
; [esp+ 8] unsigned char* dst,
; [esp+12] unsigned char* src_end,
; [esp+16] int stride);
_&procname PROC
push ebp
mov ebp,esp
push edi
push esi
push ebx
; do the first row
mov esi,[ebp+4+4]
mov edi,[ebp+8+4]
mov ebx,[ebp+16+4]
lea ecx,[ebx+esi+8]
neg ebx
pxor mm2,mm2
movq mm3,[esi] ; for use in next loop
loop0:
movq mm0,[esi]
YUV_SHIFT mm2,mm0,uyvy
add esi,8
movq mm1,mm0
psubb mm1,mm2
movq [edi],mm1
movq mm2,mm0
add edi,8
cmp esi,ecx
jb loop0
mov ecx,[ebp+8+4] ; recopy first group of four, just for consistency with other compression methods
movd [ecx],mm3
; do the remaining rows
mov ecx,[ebp+12+4]
; mm2,3 are already initialized from previous loop
align 32
; pixel arrangement:
; mm3 mm1
; mm2 mm0
loop1:
; mm2,3 <- appropriate left and above-left pixels
movq mm0,[esi]
movq mm1,[esi+ebx]
YUV_SHIFT mm2,mm0,uyvy ; note: clobbers mm4,5
add esi,8
YUV_SHIFT mm3,mm1,uyvy
; mm4 <- median of mm1,mm2,(mm1+mm2-mm3)
movq mm4,mm2 ; (mm2,mm4) <- (min(mm1,mm2),max(mm1,mm2))
movq mm5,mm2 ; mm5 <- mm1+mm2-mm3
psubusb mm4,mm1
paddb mm5,mm1
psubb mm2,mm4
psubb mm5,mm3
paddb mm4,mm1
psubusb mm2,mm5 ; mm2 = max(mm2,mm5)
paddb mm2,mm5
movq mm5,mm4 ; mm4 = min(mm2,mm4)
psubusb mm5,mm2
psubb mm4,mm5 ; now mm4 = median
; write out the result and loop
movq mm2,mm0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -