📄 ccvt.s
字号:
jnz 1b
8: decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* From YUYV to BGRa */
ENTRY(ccvt_yuyv_bgr32)
enter $72, $0 # no extra space, no stackframes
push %ebx
push %esi
push %edi
call test_param_2
jc 9f
# YUYV -> RGBa RGBa
0: mov Width, %ecx # width
1: call do_four_yuyv
call limit_pixels
call push_bgr32
cmp $0, %ecx # end of line?
jnz 1b
8: decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* Planar to RGBa */
ENTRY(ccvt_420p_rgb32)
enter $72, $0
push %ebx
push %esi
push %edi
call test_param_31
jc 9f
mov Width, %eax # width
mull Height # * height
mov SrcU, %eax # Copy U/V pointers
mov %eax, Uptr
mov SrcV, %eax
mov %eax, Vptr
0: mov Width, %ecx # width
1: call do_four_yuvp
call limit_pixels
call push_rgb32
cmp $0, %ecx # end of line?
jnz 1b
testl $1, Height # odd/even line
jnz 8f
mov Width, %eax # Even: rewind U/V pointers
shr %eax
sub %eax, Uptr
sub %eax, Vptr
8: decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* Planar to RGB */
ENTRY(ccvt_420p_rgb24)
enter $72, $0
push %ebx
push %esi
push %edi
call test_param_31
jc 9f
mov Width, %eax # width
mull Height # * height
mov SrcU, %eax # Copy U/V pointers
mov %eax, Uptr
mov SrcV, %eax
mov %eax, Vptr
0: mov Width, %ecx # width
1: call do_four_yuvp
call limit_pixels
call push_rgb24
cmp $0, %ecx # end of line?
jnz 1b
testl $1, Height # odd/even line
jnz 8f
mov Width, %eax # Even: rewind U/V pointers
shr %eax
sub %eax, Uptr
sub %eax, Vptr
8: decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* Okay... eventually, you end up with a very complete set of conversion
routines. I just wished things were a bit simpler. */
ENTRY(ccvt_420p_bgr32)
enter $72, $0
push %ebx
push %esi
push %edi
call test_param_31
jc 9f
mov Width, %eax # width
mull Height # * height
mov SrcU, %eax # Copy U/V pointers
mov %eax, Uptr
mov SrcV, %eax
mov %eax, Vptr
0: mov Width, %ecx # width
1: call do_four_yuvp
call limit_pixels
call push_bgr32
cmp $0, %ecx # end of line?
jnz 1b
testl $1, Height # odd/even line
jnz 8f
mov Width, %eax # Even: rewind U/V pointers
shr %eax
sub %eax, Uptr
sub %eax, Vptr
8: decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* Go from RGB (red first) to 4:2:0 planar.
* Note: this requires decimation of the U/V space by 2 in both directions
* Also, a matrix multiply would be QUITE convenient...
This is the matrix:
(Y ) ( 77 150 29) (R)
(Cb) = (-43 -85 128) * (G)
(Cr) (128 -107 -21) (B)
*/
ENTRY(ccvt_rgb24_420p)
enter $96, $0 # 24 bytes extra stack, no stackframes
push %ebx # -76: line width in bytes
push %esi # -80: height (copy)
push %edi # -84: width (copy)
# -88: red factor
# -92: green factor
# -96: blue factor
call test_param_13
jc 9f
mov Width, %eax
shl %eax
add Width, %eax # 3 * width = line increment
mov %eax, -76(%ebp)
mov Height, %eax
mov %eax, -80(%ebp) # copy height into stackframe
/*
This is a bit complicated... since U/V decimation is taking
place both in horizontal and vertical direction, we have to
process 2 lines in parallel. Also, 2 adjacent pixels are
considered. We average the U/V values over these 4 pixels
(of course, we could have just taken the U/V value of the first
pixel and be done with it, but that's not how we do things around
here)
*/
# 1st pass: Y values. Set factors
movl $77 , -88(%ebp) # 0.299
movl $150, -92(%ebp) # 0.587
movl $29 , -96(%ebp) # 0.114
0: mov Width, %ecx # width
1: xor %ebx, %ebx # 0
call rgb_multiply
shr $8, %ebx # divide by 256 (no need for limitor, since 77 + 150 + 29 = 256)
mov %bl, %al
stosb # store it into Y buffer
dec %ecx # end of line?
jnz 1b
decl -80(%ebp) # end of image?
jnz 0b
# Okay, now the U/V pointers...
# The following code is passed twice, with different factors
# Note that the %esi pointer jumps around quite a bit
# factors for U
movl $-43, -88(%ebp) # -0.1687
movl $-85, -92(%ebp) # -0.3313
movl $128, -96(%ebp) # 0.5
mov DstU, %edi # Set %edi register now
7: mov Src4, %esi # Rewind source pointer
mov Height, %eax # height
shr %eax # / 2
mov %eax, -80(%ebp) # copy
2: mov Width, %eax # width
shr %eax # / 2
mov %eax, -84(%ebp) # copy
3: xor %ebx, %ebx # 0
mov $4, %ecx # average over 4 pixels
4: call rgb_multiply
dec %ecx
jz 5f # done?
cmp $2, %ecx # 3rd pixel.. move %esi to next line, with offset
jne 4b
sub $6, %esi # backup to where we started
add -76(%ebp), %esi # add line increment
jmp 4b
5: # okay, 4 pixels done...
sub -76(%ebp), %esi # Get %esi back to its proper place
add $0x20000, %ebx # add 0.5 factor
shr $10, %ebx # Divide by 4 * 256
mov %bl, %al
stosb # store it!
decl -84(%ebp) # end of line?
jnz 3b
add -76(%ebp), %esi # %esi to next line (actually, 2 lines further)
decl -80(%ebp) # end of image?
jnz 2b
# check if 3rd pass has been done
cmpl $128, -88(%ebp)
je 9f # Done!
# Set factors for V pass
movl $128 , -88(%ebp) # 0.5
movl $-107, -92(%ebp) # -0.4187
movl $-21 , -96(%ebp) # -0.0813
mov DstV, %edi # %edi to V buffer
jmp 7b # "Do it to me one more time..."
9: pop %edi
pop %esi
pop %ebx
leave
ret
ENTRY(ccvt_bgr24_420p)
enter $96, $0 # 24 bytes extra stack, no stackframes
push %ebx # -4: line width in bytes
push %esi # -8: height (copy)
push %edi # -12: width (copy)
# -16: red factor
# -20: green factor
# -24: blue factor
call test_param_13
jc 9f
/* No surprise, this code looks just like rgb24_420p, but with swapped factors */
mov Width, %eax
shl %eax
add Width, %eax # 3 * width = line increment
mov %eax, -76(%ebp)
mov Height, %eax
mov %eax, -80(%ebp) # copy height into stackframe
# 1st pass: Y values. Set factors
movl $29 , -88(%ebp) # 0.114
movl $150, -92(%ebp) # 0.587
movl $77 , -96(%ebp) # 0.299
0: mov Width, %ecx # width
1: xor %ebx, %ebx # 0
call rgb_multiply
shr $8, %ebx # divide by 256 (no need for limitor, since 77 + 150 + 29 = 256)
mov %bl, %al
stosb # store it into Y buffer
dec %ecx # end of line?
jnz 1b
decl -80(%ebp) # end of image?
jnz 0b
# Okay, now the U/V pointers...
# The following code is passed twice, with different factors
# Note that the %esi pointer jumps around quite a bit
# factors for U
movl $123, -88(%ebp) # 0.5
movl $-85, -92(%ebp) # -0.3313
movl $-43, -96(%ebp) # -0.1687
mov DstU, %edi # Set %edi register now
7: mov Src4, %esi # Rewind source pointer
mov Height, %eax # height
shr %eax # / 2
mov %eax, -80(%ebp) # copy
2: mov Width, %eax # width
shr %eax # / 2
mov %eax, -84(%ebp) # copy
3: xor %ebx, %ebx # 0
mov $4, %ecx # average over 4 pixels
4: call rgb_multiply
dec %ecx
jz 5f # done?
cmp $2, %ecx # 3rd pixel.. move %esi to next line, with offset
jne 4b
sub $6, %esi # backup to where we started
add -76(%ebp), %esi # add line increment
jmp 4b
5: # okay, 4 pixels done...
sub -76(%ebp), %esi # Get %esi back to its proper place
add $0x20000, %ebx # add 0.5 factor
shr $10, %ebx # Divide by 4 * 256
mov %bl, %al
stosb # store it!
decl -84(%ebp) # end of line?
jnz 3b
add -76(%ebp), %esi # %esi to next line (actually, 2 lines further)
decl -80(%ebp) # end of image?
jnz 2b
# check if 3rd pass has been done
cmpl $-21, -88(%ebp)
je 9f # Done!
# Set factors for V pass
movl $-21 , -88(%ebp) # -0.0813
movl $-107, -92(%ebp) # -0.4187
movl $128 , -96(%ebp) # 0.5
mov DstV, %edi # %edi to V buffer
jmp 7b # "Do it to me one more time..."
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* RGB-to-YUV helper functions */
rgb_multiply:
# do one RGB vector multiplication; its assumed the RGB factors
# are set on the stack. The data is accumulated in ebx.
lodsb # red byte
and $0xff, %eax
mov -88(%ebp), %edx # red factor
mul %edx
add %eax, %ebx
lodsb # green byte
and $0xff, %eax
mov -92(%ebp), %edx # green factor
mul %edx
add %eax, %ebx
lodsb # blue byte
and $0xff, %eax
mov -96(%ebp), %edx # blue factor
mul %edx
add %eax, %ebx # ebx now contains sum
ret
/**************************************************************************/
/* Go from 'interlaced' (YYYY UU/VV) format to planar */
ENTRY(ccvt_420i_420p)
enter $76, $0 # 4 bytes extra space, no stackframes
push %ebx # -4: width / 4
push %esi
push %edi
call test_param_13
jc 9f
# Okay, this is fairly easy... we first grab the Y values (4 bytes
# at a time), then rewind and do the U values, and repeat for V.
# This leaves us with a nice planar format
mov Width, %eax
shr %eax
shr %eax # width / 4
mov %eax, -76(%ebp) # Store
# Y
mov Height, %edx # line counter
0: mov -76(%ebp), %ecx
1: lodsl # get 4 bytes...
stosl # ...push 4 bytes
add $2, %esi # Skip U or V
loop 1b
dec %edx
jnz 0b
# U
mov Src4, %esi # rewind source pointer
mov DstU, %edi
add $4, %esi # set to U
mov Height, %edx
shr %edx # height / 2
mov Width, %ebx
shl %ebx
add Width, %ebx
shr %ebx # Width * 1.5 (line offset)
2: mov -76(%ebp), %ecx # width / 4
3: lodsw # 2 bytes at a time
stosw
add $4, %esi # skip Y
loop 3b
add %ebx, %esi # Skip line (U is on even lines)
dec %edx
jnz 2b
# V
mov Src4, %esi # rewind, set to V in first odd line
add $4, %esi
add %ebx, %esi # register re-use; no compiler can beat that :)
mov DstV, %edi # V ptr
mov Height, %edx
shr %edx # height / 2
4: mov -76(%ebp), %ecx # Get width/4
5: lodsw
stosw
add $4, %esi # Skip Y
loop 5b
add %ebx, %esi # Skip line (V is on odd lines)
dec %edx
jnz 4b
/* That's it! */
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* Go from 4:2:0 interlaced to 'normal' YUYV */
ENTRY(ccvt_420i_yuyv)
enter $80, $0 # 8 bytes extra space, no stackframes
push %ebx
push %esi
push %edi
call test_param_2
jc 9f
mov Width, %ecx # -4: width / 4 = no. loops per line
shr %ecx
shr %ecx
mov %ecx, -76(%ebp)
mov Width, %ebx # -8: width * 1.5 = line offset
shl %ebx
add Width, %ebx
shr %ebx
mov %ebx, -80(%ebp)
# Okay, this requires a bit of byte shuffling... we go from
# YYYY UU
# YYYY VV
# to
# YUYV YUYV
# YUYV YUYV
# which indeed takes up more space
#
0: mov -76(%ebp), %ecx
1: lodsl # 4 Y in eax
testl $1, Height # even or odd line?
jnz 2f
# Even
mov -80(%ebp), %ebx
mov (%ebx, %esi), %dx # 16 bits V
shl $16, %edx # store in high word
mov (%esi), %dx # 16 bits U
add $2, %esi
jmp 3f
2: # Odd
mov -80(%ebp), %ebx
neg %ebx # negative offset
mov (%esi), %dx # 16 bits V
shl $16, %edx # store in high word
mov (%ebx, %esi), %dx # 16 bits U
add $2, %esi
3: # eax = Y3Y2Y1Y0, edx = V1V0U1U0, ebx is free
push %eax
movzbl %al, %ebx # ______y0
and $0xFF00, %eax # ____y1__
shl $8, %eax # __y1____
or %ebx, %eax # __y1__y0
mov %edx, %ebx # v1v0u1u0
shl $8, %ebx # v0u1u0__
and $0xff00ff00, %ebx # v0__u0__
or %ebx, %eax # v0y1u0y0
stosl
pop %eax # y3y2y1y0
# Second half
shr $8, %eax # __y3y2y1
shr $8, %ax # __y3__y2
and $0xff00ff00, %edx # v1__u1__
or %edx, %eax # v1y3u1y2
stosl
loop 1b
decl Height # height--
jnz 0b
# Done
9: pop %edi
pop %esi
pop %ebx
leave
ret
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -