📄 frame_mmx.asm
字号:
pxor mm0, mm0 ; all 0
pxor mm1, mm1 ; all 0
pcmpeqd mm2, mm2 ; all -1
movd mm3, [edx+20] ; y_offset
movd mm4, [edx+24] ; y_gain
movd mm5, [edx+28] ; bu
movd mm6, [edx+32] ; gu
movd mm7, [edx+36] ; gv
psubd mm0, mm2 ; all 1
psubw mm1, mm2 ; all 1
punpcklwd mm3, mm3 ; xx_xx_yo_yo
movq mm2, mm0
punpckldq mm3, mm3 ; yo_yo_yo_yo
pslld mm0, 2 ; half_3
psllw mm1, 7 ; c_offset
pslld mm2, 12 ; half_13
punpckldq mm4, mm4 ; yg_yg
punpckldq mm5, mm5 ; bu_bu
punpckldq mm6, mm7 ; gv_gu
movd mm7, [edx+40] ; rv
paddd mm4, mm0
paddd mm5, mm0
paddd mm6, mm0
punpckldq mm7, mm7 ; rv_rv
psrad mm4, 3
psrad mm5, 3
paddd mm7, mm0
psrad mm6, 3
psrad mm7, 3
packssdw mm6, mm6 ; gv_gu_gv_gu
movq [esp+8], mm3
movq [esp+16], mm4
movq [esp+24], mm2
movq [esp+32], mm1
movq [esp+40], mm5
movq [esp+48], mm6
movq [esp+56], mm7
mov eax, [edx]
mov ecx, [edx]
movd mm0, [edx+8]
mov ebx, [edx+12]
movd mm1, [edx+16]
mov edx, [edx+4]
mov esi, [ebp+28+4]
mov edi, [ebp+28+8]
and eax, 7
shr ecx, 3
mov [esp+136], ebx ; out_step
mov ebx, eax
shl eax, 1
movd mm2, [esi+16]
movd mm3, [esi+20]
movd mm4, [esi+24]
movd mm5, [edi+16]
movd mm6, [edi+20]
movd mm7, [edi+24]
add eax, ebx
punpckldq mm0, mm0
punpckldq mm1, mm1
punpckldq mm3, mm4
punpckldq mm6, mm7
paddd mm5, mm0 ; bottom y + in_step
paddd mm6, mm1 ; bottom uv + c_offset
paddd mm3, mm1 ; top uv + c_offset
paddd mm6, mm0 ; bottom uv + in_step + c_offset
punpckldq mm2, mm5
pslld mm0, 1 ; in_step * 2
mov edi, [ebp+28+12] ; out
movq [esp+ 96], mm2 ; top y & bottom y
movq [esp+104], mm3 ; top uv
movq [esp+112], mm6 ; bottom uv
mov [esp+120], edi
mov [esp+124], ecx ; width/8
mov [esp+128], eax ; width%8*3
movd [esp+132], mm0 ; in_step*2
mov esi, [esp+ 96]
mov eax, [esp+104]
mov ebx, [esp+108]
;-------------------------------------------------------------------
; 廲曽岦儖乕僾
yuv422_to_bgr_next_line:
;-------------------------------------------------------------------
; 墶曽岦儖乕僾
yuv422_to_bgr_next_8_pixel:
;-------------------------------------------------------------------
; 曄姺僐傾
pxor mm7, mm7
movd mm0, [esi] ; Y[0:3]
movd mm2, [esi+4] ; Y[4:7]
movd mm4, [eax] ; U[0:3]
movd mm5, [ebx] ; V[0:3]
lea esi, [esi+8]
lea eax, [eax+4]
lea ebx, [ebx+4]
punpcklbw mm0, mm7
punpcklbw mm2, mm7
psubw mm0, [esp+8]
psubw mm2, [esp+8]
punpcklbw mm4, mm7
punpcklbw mm5, mm7
movq mm1, mm0
movq mm3, mm2
punpcklwd mm0, mm7
punpcklwd mm2, mm7
punpckhwd mm1, mm7
punpckhwd mm3, mm7
pmaddwd mm0, [esp+16]
pmaddwd mm1, [esp+16]
pmaddwd mm2, [esp+16]
pmaddwd mm3, [esp+16]
paddd mm0, [esp+24]
paddd mm1, [esp+24]
paddd mm2, [esp+24]
paddd mm3, [esp+24]
psubw mm4, [esp+32]
psubw mm5, [esp+32]
movq [esp+64], mm0
movq [esp+72], mm1
movq [esp+80], mm2
movq [esp+88], mm3
movq mm0, mm4
movq mm1, mm4
movq mm2, mm4
movq mm3, mm5
punpcklwd mm0, mm7
punpckhwd mm1, mm7
punpcklwd mm2, mm5
punpcklwd mm3, mm7
punpckhwd mm4, mm5
punpckhwd mm5, mm7
pmaddwd mm0, [esp+40]
pmaddwd mm1, [esp+40]
pmaddwd mm2, [esp+48]
pmaddwd mm3, [esp+56]
pmaddwd mm4, [esp+48]
pmaddwd mm5, [esp+56]
movq mm6, mm0
movq mm7, mm1
punpckldq mm0, mm0
punpckldq mm1, mm1
paddd mm0, [esp+64]
paddd mm1, [esp+80]
punpckhdq mm6, mm6
punpckhdq mm7, mm7
paddd mm6, [esp+72]
paddd mm7, [esp+88]
psrad mm0, 13
psrad mm1, 13
psrad mm6, 13
psrad mm7, 13
packssdw mm0, mm6
packssdw mm1, mm7
movq mm6, mm2
movq mm7, mm4
punpckldq mm2, mm2
punpckldq mm4, mm4
paddd mm2, [esp+64]
paddd mm4, [esp+80]
punpckhdq mm6, mm6
punpckhdq mm7, mm7
paddd mm6, [esp+72]
paddd mm7, [esp+88]
psrad mm2, 13
psrad mm4, 13
psrad mm6, 13
psrad mm7, 13
packssdw mm2, mm6
packssdw mm4, mm7
movq mm6, mm3
movq mm7, mm5
punpckldq mm3, mm3
punpckldq mm5, mm5
paddd mm3, [esp+64]
paddd mm5, [esp+80]
punpckhdq mm6, mm6
punpckhdq mm7, mm7
paddd mm6, [esp+72]
paddd mm7, [esp+88]
psrad mm3, 13
psrad mm5, 13
psrad mm6, 13
psrad mm7, 13
packssdw mm3, mm6
packssdw mm5, mm7
;
; 尰嵼偺儗僕僗僞忬嫷
;
; mm0 - B[0:3]
; mm1 - B[4:7]
; mm2 - G[0:3]
; mm3 - R[0:3]
; mm4 - G[4:7]
; mm5 - R[4:7]
;
; 偙偙偐傜
;
; G2_B2_R1_G1_B1_R0_G0_B0
; B5_R4_G4_B4_R3_G3_B3_R2
; R7_G7_B7_R6_G6_B6_R5_G5
;
; 偲偡傞偺偑栚揑
;
packuswb mm0, mm1 ; B7B6B5B4B3B2B1B0
packuswb mm2, mm4 ; G7G6G5G4G3G2G1G0
packuswb mm3, mm5 ; R7R6R5R4R3R2R1R0
movq mm1, mm0
movq mm4, mm0
movq mm5, mm2
movq mm6, mm3
psrlq mm1, 8 ; 00B7B6B5B4B3B2B1
punpcklbw mm0, mm2 ; G3B3G2B2G1B1G0B0 x3x0
punpcklbw mm2, mm3 ; R3G3R2G2R1G1R0G0 5x2x
punpcklbw mm3, mm1 ; B4R3B3R2B2R1B1R0 x4x1
punpckhbw mm4, mm5 ; G7B7G6B6G5B5G4B4 x9x6
punpckhbw mm5, mm6 ; R7G7R6G6R5G5R4G4 Bx8x
punpckhbw mm6, mm1 ; 00R7B7R6B6R5B5R4 xAx7
movq mm1, mm0
movq mm7, mm4
psrlq mm2, 16 ; 0000R3G3R2G2R1G1 x5x2
psrlq mm5, 16 ; 0000R7G7R6G6R5G5 xBx8
punpcklwd mm0, mm3 ; B2R1G1B1B1R0G0B0 xx10
punpcklwd mm4, mm6 ; B6R5G5B5B5R4G4B4 xx76
psrlq mm1, 32 ; 00000000G3B3G2B2 xxx3
psrlq mm7, 32 ; 00000000G7B7G6B6 xxx9
punpckhwd mm3, mm2 ; 0000B4R3R3G3B3R2 xx54
punpckhwd mm6, mm5 ; 000000R7R7G7B7R6 xxBA
punpcklwd mm2, mm1 ; G3B3R2G2G2B2R1G1 xx32
punpcklwd mm5, mm7 ; G7B7R6G6G6B6R5G5 xx98
movd [edi], mm0
movd [edi+4], mm2
movd [edi+8], mm3
movd [edi+12], mm4
movd [edi+16], mm5
movd [edi+20], mm6
lea edi, [edi+24]
;-------------------------------------------------------------------
; 墶曽岦儖乕僾廔抂僠僃僢僋
dec ecx
jnz yuv422_to_bgr_next_8_pixel
;-------------------------------------------------------------------
; 抂悢張棟
mov ecx, [esp+128]
test ecx, ecx
jz yuv422_to_bgr_line_end
; 抂悢張棟僐傾
pxor mm7, mm7
movd mm0, [esi] ; Y[0:3]
movd mm2, [esi+4] ; Y[4:7]
movd mm4, [eax] ; U[0:3]
movd mm5, [ebx] ; V[0:3]
punpcklbw mm0, mm7
punpcklbw mm2, mm7
psubw mm0, [esp+8]
psubw mm2, [esp+8]
punpcklbw mm4, mm7
punpcklbw mm5, mm7
movq mm1, mm0
movq mm3, mm2
punpcklwd mm0, mm7
punpcklwd mm2, mm7
punpckhwd mm1, mm7
punpckhwd mm3, mm7
pmaddwd mm0, [esp+16]
pmaddwd mm1, [esp+16]
pmaddwd mm2, [esp+16]
pmaddwd mm3, [esp+16]
paddd mm0, [esp+24]
paddd mm1, [esp+24]
paddd mm2, [esp+24]
paddd mm3, [esp+24]
psubw mm4, [esp+32]
psubw mm5, [esp+32]
movq [esp+64], mm0
movq [esp+72], mm1
movq [esp+80], mm2
movq [esp+88], mm3
movq mm0, mm4
movq mm1, mm4
movq mm2, mm4
movq mm3, mm5
punpcklwd mm0, mm7
punpckhwd mm1, mm7
punpcklwd mm2, mm5
punpcklwd mm3, mm7
punpckhwd mm4, mm5
punpckhwd mm5, mm7
pmaddwd mm0, [esp+40]
pmaddwd mm1, [esp+40]
pmaddwd mm2, [esp+48]
pmaddwd mm3, [esp+56]
pmaddwd mm4, [esp+48]
pmaddwd mm5, [esp+56]
movq mm6, mm0
movq mm7, mm1
punpckldq mm0, mm0
punpckldq mm1, mm1
paddd mm0, [esp+64]
paddd mm1, [esp+80]
punpckhdq mm6, mm6
punpckhdq mm7, mm7
paddd mm6, [esp+72]
paddd mm7, [esp+88]
psrad mm0, 13
psrad mm1, 13
psrad mm6, 13
psrad mm7, 13
packssdw mm0, mm6
packssdw mm1, mm7
movq mm6, mm2
movq mm7, mm4
punpckldq mm2, mm2
punpckldq mm4, mm4
paddd mm2, [esp+64]
paddd mm4, [esp+80]
punpckhdq mm6, mm6
punpckhdq mm7, mm7
paddd mm6, [esp+72]
paddd mm7, [esp+88]
psrad mm2, 13
psrad mm4, 13
psrad mm6, 13
psrad mm7, 13
packssdw mm2, mm6
packssdw mm4, mm7
movq mm6, mm3
movq mm7, mm5
punpckldq mm3, mm3
punpckldq mm5, mm5
paddd mm3, [esp+64]
paddd mm5, [esp+80]
punpckhdq mm6, mm6
punpckhdq mm7, mm7
paddd mm6, [esp+72]
paddd mm7, [esp+88]
psrad mm3, 13
psrad mm5, 13
psrad mm6, 13
psrad mm7, 13
packssdw mm3, mm6
packssdw mm5, mm7
packuswb mm0, mm1 ; B7B6B5B4B3B2B1B0
packuswb mm2, mm4 ; G7G6G5G4G3G2G1G0
packuswb mm3, mm5 ; R7R6R5R4R3R2R1R0
movq mm1, mm0
movq mm4, mm0
movq mm5, mm2
movq mm6, mm3
psrlq mm1, 8 ; 00B7B6B5B4B3B2B1
punpcklbw mm0, mm2 ; G3B3G2B2G1B1G0B0 x3x0
punpcklbw mm2, mm3 ; R3G3R2G2R1G1R0G0 5x2x
punpcklbw mm3, mm1 ; B4R3B3R2B2R1B1R0 x4x1
punpckhbw mm4, mm5 ; G7B7G6B6G5B5G4B4 x9x6
punpckhbw mm5, mm6 ; R7G7R6G6R5G5R4G4 Bx8x
punpckhbw mm6, mm1 ; 00R7B7R6B6R5B5R4 xAx7
movq mm1, mm0
movq mm7, mm4
psrlq mm2, 16 ; 0000R3G3R2G2R1G1 x5x2
psrlq mm5, 16 ; 0000R7G7R6G6R5G5 xBx8
punpcklwd mm0, mm3 ; B2R1G1B1B1R0G0B0 xx10
punpcklwd mm4, mm6 ; B6R5G5B5B5R4G4B4 xx76
psrlq mm1, 32 ; 00000000G3B3G2B2 xxx3
psrlq mm7, 32 ; 00000000G7B7G6B6 xxx9
punpckhwd mm3, mm2 ; 0000B4R3R3G3B3R2 xx54
punpckhwd mm6, mm5 ; 000000R7R7G7B7R6 xxBA
punpcklwd mm2, mm1 ; G3B3R2G2G2B2R1G1 xx32
punpcklwd mm5, mm7 ; G7B7R6G6G6B6R5G5 xx98
punpckldq mm0, mm2 ; 3210
punpckldq mm3, mm4 ; 7654
punpckldq mm5, mm6 ; BA98
movq [esp+64], mm0
movq [esp+72], mm3
movq [esp+80], mm5
lea esi, [esp+64]
rep movsb
;-------------------------------------------------------------------
; 廲曽岦儖乕僾廔抂僠僃僢僋
yuv422_to_bgr_line_end:
movd mm0, [esp+132] ; in_step*2
movd mm1, [esp+136] ; out_step
movq mm2, [esp+96] ; y, current & next
movq mm3, [esp+104] ; current uv
movq mm4, [esp+112] ; next uv
movd mm5, [esp+120] ; out
paddd mm2, mm0 ; current y + in_step * 2
punpckldq mm0, mm0
paddd mm5, mm1
paddd mm3, mm0
movd [esp+100], mm2
psrlq mm2, 32
movq [esp+104], mm4
movd esi, mm2
movq [esp+112], mm3
movd edi, mm5
mov [esp+96], esi
mov [esp+120], edi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -