📄 ccvt.s
字号:
/* Assembly code for various format conversion */
/* The ccvt_* functions always start with width and height, so these
parameters are in 8(%ebp) and 12 (%ebp). The other parameters can be
2 to 4 pointers, and one of these combinations:
*src, *dst
*srcy, *srcu, *srv, *dst
*src, *dsty, *dstu, *dstv
*/
#define __ASSEMBLY__
#include <linux/linkage.h>
#define Width 8(%ebp)
#define Height 12(%ebp)
/* 2 parameters */
#define Src2 16(%ebp)
#define Dst2 20(%ebp)
/* 4 parameters, 3 in, 1 out */
#define SrcY 16(%ebp)
#define SrcU 20(%ebp)
#define SrcV 24(%ebp)
#define Dst4 28(%ebp)
/* 4 parameters, 1 in, 3 out */
#define Src4 16(%ebp)
#define DstY 20(%ebp)
#define DstU 24(%ebp)
#define DstV 28(%ebp)
/* This buffer space used to be staticly allocted, but this is going to
give problems with multiple cams (though I have yet to see it).
Therefor, we reserve at least 72 bytes on the stack with `enter'.
*/
#define PixelBuffer -64(%ebp)
#define Uptr -68(%ebp)
#define Vptr -72(%ebp)
.data
#PixelBuffer: # 4 RGBa or YUV pixels (64 bytes)
#.long 0, 0, 0, 0
#.long 0, 0, 0, 0
#.long 0, 0, 0, 0
#.long 0, 0, 0, 0
#
#Uptr: .long 0 # Y is always in %esi/%edi (8 bytes)
#Vptr: .long 0
.text
/* This function will load the src and destination pointers, including
Uptr/Vptr when necessary, and test the width/height parameters.
- %esi will be set to Src or SrcY
- %edi will be set to Dst or DstY
the carry flag will be set if any of these tests fail.
It assumes %ebp has been set.
*/
/* 2 parameters, src & dst */
test_param_2:
mov Src2, %esi
mov Dst2, %edi
cmp $0, %esi # NULL pointers?
je param_fail
cmp $0, %edi
je param_fail
jmp test_width_height
/* 3 inputs, 1 output */
test_param_31:
mov Dst4, %edi # NULL pointers
cmp $0, %edi
je param_fail
mov SrcV, %esi
cmp $0, %esi
je param_fail
mov %esi, Vptr
mov SrcU, %esi
cmp $0, %esi
je param_fail
mov %esi, Uptr
mov SrcY, %esi
cmp $0, %esi
je param_fail
jmp test_width_height
/* 1 input, 3 output */
test_param_13:
mov Src4, %esi # NULL pointers
cmp $0, %esi
je param_fail
mov DstV, %edi
cmp $0, %edi
je param_fail
mov %edi, Vptr
mov DstU, %edi
cmp $0, %edi
je param_fail
mov %edi, Uptr
mov DstY, %edi
cmp $0, %edi
je param_fail
jmp test_width_height
nop
test_width_height:
cmpl $0, Width
jbe param_fail
testl $3, Width # multiple of 4?
jnz param_fail # Nope...
cmp $0, Height # check illegal height
jbe param_fail
testl $1, Height # Odd no. of lines?
jnz param_fail # Aye
/* fall through */
/* exit points */
param_ok:
clc # Success: clear carry
ret
param_fail:
stc # Fail: set carry
ret
# This will fill PixelBuffer with 4 grey scale pixels (Y)
# In: %eax = Value (Y3Y2Y1Y0)
# Out:
# Modifies: %ecx (-4)
# Destroys: %edx
expand_4_y:
mov %eax, %edx # Keep in edx (we need eax)
lea PixelBuffer, %edi
0: # This code is executed 4 times
movzbl %dl, %eax # move, zero extending byte-to-long
shl $8, %eax # 8 digit precision
stosl # Expand into PixelBuffer
stosl
stosl
add $4, %edi # Skip alpha
shr $8, %edx # next Y
dec %ecx
test $3, %ecx
jnz 0b
ret # from expand_4_y
# This will add the color factors to the (grey) values in PixelBuffer
# In: %ebx (U1U0V1V0)
# Out:
# Modifies:
# Destroys: %edi, %ebx, %eax, %edx
expand_4_uv:
lea PixelBuffer, %edi # reset pointer
# V0
sub $128, %bl
movsbl %bl, %eax
mov $359, %edx # Vr
mul %edx
add %eax, 0x00(%edi)
add %eax, 0x10(%edi)
movsbl %bl, %eax
mov $183, %edx # Vg
mul %edx
sub %eax, 0x04(%edi)
sub %eax, 0x14(%edi)
# V1
sub $128, %bh
movsbl %bh, %eax
mov $359, %edx # Vr
mul %edx
add %eax, 0x20(%edi)
add %eax, 0x30(%edi)
movsbl %bh, %eax
mov $183, %edx # Vg
mul %edx
sub %eax, 0x24(%edi)
sub %eax, 0x34(%edi)
# U0
bswap %ebx # Get U values in lower half
sub $128, %bh
movsbl %bh, %eax
mov $88, %edx # Ug
mul %edx
sub %eax, 0x04(%edi)
sub %eax, 0x14(%edi)
movsbl %bh, %eax
mov $454, %edx # Ub
mul %edx
add %eax, 0x08(%edi)
add %eax, 0x18(%edi)
# U1
sub $128, %bl
movsbl %bl, %eax
mov $88, %edx # Ug
mul %edx
sub %eax, 0x24(%edi)
sub %eax, 0x34(%edi)
movsbl %bl, %eax
mov $454, %edx # Ub
mul %edx
add %eax, 0x28(%edi)
add %eax, 0x38(%edi)
ret # expand_4_uv
/* This function expands 4 420i pixels into PixelBuffer */
do_four_yuvi:
push %edi
lodsl # 4 bytes at a time
call expand_4_y
# now do UV values. on even lines, Y is followed by U values; on
# odd lines V values follow. The U and V values are always pushed
# on the stack in this order:
# U V
# First, calculate offset per line (1.5 * width)
mov Width, %ebx # width
shl %ebx # 2 *
add Width, %ebx # 3 *
shr %ebx # 1.5 *
# even or odd lines
testl $1, Height
jz 2f
# odd line; we are at V data, but do U data first
neg %ebx # make ebx offset negative
mov (%esi,%ebx),%ax # U
push %ax
lodsw # V
push %ax
jmp 3f
2: # even line
lodsw # U
push %ax
sub $2, %ebx
mov (%esi,%ebx), %ax # V
push %ax
3: # Okay, so we now have the U and V values... expand into PixelBuffer
pop %ebx
call expand_4_uv
pop %edi
ret # from do_four_yuvi
# Do four pixels, in planar format
do_four_yuvp:
push %edi
# The first part is the same as for interlaced (4 bytes Y)
lodsl # 4 bytes at a time
call expand_4_y
# now gather U and V values...
mov Uptr, %ebx # Use Uptr/Vptr
mov (%ebx), %ax
push %ax
add $2, %ebx
mov %ebx, Uptr
mov Vptr, %ebx
mov (%ebx), %ax
push %ax
add $2, %ebx
mov %ebx, Vptr
pop %ebx
call expand_4_uv
pop %edi
ret
# Do four pixels, in yuyv interlaced format
do_four_yuyv:
push %edi
lodsl # v0y1u0y0
mov %eax, %ebx
bswap %ebx # y0u0y1v0
mov %bh, %ah # v0y1y1y0
and $0x00ff00ff, %ebx # __u0__v0
push %ax # y1y0
lodsl # v1y3u1y2 # mix register instructions
mov %eax, %edx # so CPU pipeline doesnt stall
rol $16, %eax # u1y2v1y3
mov %dl, %dh # v1y3y2y2
and $0xff00ff00, %eax # u1__v1__
mov $0, %dl # v1y3y2__
or %eax, %ebx # u1u0v1v0
shl $8, %edx # y3y2____
pop %dx # y3y2y1y0
mov %edx, %eax
call expand_4_y
call expand_4_uv
pop %edi
ret
limit_pixels:
# Limit all values in PixelBuffer
push %esi
push %edi
push %ecx
lea PixelBuffer, %esi
mov %esi, %edi
mov $16, %ecx
0: lodsl
cmp $0, %eax # this would have been a perfect spot for CMOVxx instructions...
jl 2f # except they only work on Pentium Pro processors,
cmp $0xff00, %eax # and not even all of them
jg 3f
add $4, %edi # no use for stosl here
loop 0b
jmp 9f
2: mov $0, %eax
stosl
loop 0b
jmp 9f
3: mov $0xff00, %eax
stosl
loop 0b
jmp 9f
9: pop %ecx
pop %edi
pop %esi
ret # from limit_pixels
/* Copy RGB values from PixelBuffer into destination buffer, 4 bytes
with alpha
*/
/* Push 3 pixel (12 bytes), in correct order */
push_rgb24:
push %ecx
push %esi
lea PixelBuffer, %esi
mov $4, %ecx
0: lodsl
shr $8, %eax
mov %al, (%edi) # Red
lodsl
shr $8, %eax
mov %al, 1(%edi) # Green
lodsl
shr $8, %eax
mov %al, 2(%edi) # Blue
add $3, %edi
lodsl # dummy
loop 0b
pop %esi
pop %ecx
ret
/* Push 3 pixels (12 bytes), in wrong order */
push_bgr24:
push %ecx
push %esi
lea PixelBuffer, %esi
mov $4, %ecx
0: lodsl
shr $8, %eax
mov %al, 2(%edi) # Red
lodsl
shr $8, %eax
mov %al, 1(%edi) # Green
lodsl
shr $8, %eax
mov %al, (%edi) # Blue
add $3, %edi
lodsl # dummy
loop 0b
pop %esi
pop %ecx
ret
/* The simplest format: push 4 bytes, RGBa */
push_rgb32:
push %ecx
push %esi
mov $16, %ecx
lea PixelBuffer, %esi
0: lodsl # red
shr $8, %eax # 8 bit precision
stosb
loop 0b
pop %esi
pop %ecx
ret
/* Gosh. Would you believe it. They even made this format... (Qt 2.*) */
push_bgr32:
# copy all 4 values to output buffer
push %ecx
push %esi
mov $4, %ecx
lea PixelBuffer, %esi
0: lodsl # red
shr $8, %eax # 8 bit precision
mov %al, 2(%edi)
lodsl # green
shr $8, %eax
mov %al, 1(%edi)
lodsl # blue
shr $8, %eax
mov %al, (%edi)
add $4, %edi
lodsl # dummy
loop 0b
pop %esi
pop %ecx
ret
/*************************************/
/* Functions to go from YUV interlaced formats to RGB */
/* Go from interlaced to RGB, red first */
ENTRY(ccvt_420i_rgb24)
enter $72, $0 # no extra space, no stackframes
push %ebx
push %esi
push %edi
call test_param_2
jc 9f
0: mov Width, %ecx # width
1: call do_four_yuvi
call limit_pixels
call push_rgb24
cmp $0, %ecx
jnz 1b # end of line?
decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* Go from interlaced to BGR, blue first */
ENTRY(ccvt_420i_bgr24)
enter $72, $0 # no extra space, no stackframes
push %ebx
push %esi
push %edi
call test_param_2
jc 9f
0: mov Width, %ecx # width
1: call do_four_yuvi
call limit_pixels
call push_bgr24
cmp $0, %ecx
jnz 1b # end of line?
decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* From interlaced to RGBa */
ENTRY(ccvt_420i_rgb32)
enter $72, $0 # no extra space, no stackframes
push %ebx
push %esi
push %edi
call test_param_2
jc 9f
0: mov Width, %ecx # width
1: call do_four_yuvi
call limit_pixels
call push_rgb32
cmp $0, %ecx # end of line?
jnz 1b
decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* Guess what? Go from interlaced to BGRa */
ENTRY(ccvt_420i_bgr32)
enter $72, $0 # no extra space, no stackframes
push %ebx
push %esi
push %edi
call test_param_2
jc 9f
0: mov Width, %ecx # width
1: call do_four_yuvi
call limit_pixels
call push_bgr32
cmp $0, %ecx # end of line?
jnz 1b
decl Height # yes; decrement line counter
jnz 0b
9: pop %edi
pop %esi
pop %ebx
leave
ret
/* From YUYV to RGBa */
ENTRY(ccvt_yuyv_rgb32)
enter $72, $0 # no extra space, no stackframes
push %ebx
push %esi
push %edi
call test_param_2
jc 9f
0: mov Width, %ecx # width
1: call do_four_yuyv
call limit_pixels
call push_rgb32
cmp $0, %ecx # end of line?
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -