📄 rgbconv.asm
字号:
pshufd xmm6, xmm2, 01010101b ;// CR1 CR1 CR1 CR1
mulps xmm5, TBL_MultCB ;// 1.772*CB1 -0.34414*CB1 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR1 1.402*CR1 0
addps xmm4, xmm5 ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
addps xmm4, xmm6 ;// B1 G1 R1 Y1
cvtps2dq xmm3, xmm3 ;// convert floats to ints
cvtps2dq xmm4, xmm4 ;// convert floats to ints
packssdw xmm3, xmm4 ;// convert ints to words
pshufd xmm4, xmm0, 010101010b ;// Y2 Y2 Y2 Y2
pshufd xmm5, xmm1, 010101010b ;// CB2 CB2 CB2 CB2
pshufd xmm6, xmm2, 010101010b ;// CR2 CR2 CR2 CR2
mulps xmm5, TBL_MultCB ;// 1.772*CB2 -0.34414*CB2 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR2 1.402*CR2 0
addps xmm4, xmm5 ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
addps xmm4, xmm6 ;// B2 G2 R2 Y2
pshufd xmm0, xmm0, 011111111b ;// Y3 Y3 Y3 Y3
pshufd xmm1, xmm1, 011111111b ;// CB3 CB3 CB3 CB3
pshufd xmm2, xmm2, 011111111b ;// CR3 CR3 CR3 CR3
mulps xmm1, TBL_MultCB ;// 1.772*CB3 -0.34414*CB3 0 0
mulps xmm2, xmm7 ;// 0 -0.71414*CR3 1.402*CR3 0
addps xmm0, xmm1 ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
addps xmm0, xmm2 ;// B3 G3 R3 Y3
cvtps2dq xmm4, xmm4 ;// convert floats to ints
cvtps2dq xmm0, xmm0 ;// convert floats to ints
packssdw xmm4, xmm0 ;// convert ints to words (Y3 R3 G3 B3 Y2 R2 G2 B2)
packuswb xmm3, xmm4 ;// convert words to bytes (B3 G3 R3 Y3 ... B0 G0 R0 Y0)
movdqa [DST], xmm3 ;// write 4 pixels
movaps xmm6, TBL_MultCB ;// TBL_MultCB
movaps xmm0, [Y][4*4] ;// Y3 Y2 Y1 Y0
movaps xmm1, [CB][4*4] ;// CB3 CB2 CB1 CB0
movaps xmm2, [CR][4*4] ;// CR3 CR2 CR1 CR0
addps xmm0, [Ctx].FrameInfo.S ;// Y+S
pshufd xmm3, xmm0, 0 ;// Y0 Y0 Y0 Y0
pshufd xmm4, xmm1, 0 ;// CB0 CB0 CB0 CB0
pshufd xmm5, xmm2, 0 ;// CR0 CR0 CR0 CR0
mulps xmm4, xmm6 ;// 1.772*CB0 -0.34414*CB0 0 0
mulps xmm5, xmm7 ;// 0 -0.71414*CR0 1.402*CR0 0
addps xmm3, xmm4 ;// Y0+1.772*CB0 Y0-0.34414*CB0 Y0 Y0
addps xmm3, xmm5 ;// B0 G0 R0 Y0
pshufd xmm4, xmm0, 01010101b ;// Y1 Y1 Y1 Y1
pshufd xmm5, xmm1, 01010101b ;// CB1 CB1 CB1 CB1
pshufd xmm6, xmm2, 01010101b ;// CR1 CR1 CR1 CR1
mulps xmm5, TBL_MultCB ;// 1.772*CB1 -0.34414*CB1 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR1 1.402*CR1 0
addps xmm4, xmm5 ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
addps xmm4, xmm6 ;// B1 G1 R1 Y1
cvtps2dq xmm3, xmm3 ;// convert floats to ints
cvtps2dq xmm4, xmm4 ;// convert floats to ints
packssdw xmm3, xmm4 ;// convert ints to words
pshufd xmm4, xmm0, 010101010b ;// Y2 Y2 Y2 Y2
pshufd xmm5, xmm1, 010101010b ;// CB2 CB2 CB2 CB2
pshufd xmm6, xmm2, 010101010b ;// CR2 CR2 CR2 CR2
mulps xmm5, TBL_MultCB ;// 1.772*CB2 -0.34414*CB2 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR2 1.402*CR2 0
addps xmm4, xmm5 ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
addps xmm4, xmm6 ;// B2 G2 R2 Y2
shufps xmm0, xmm0, 011111111b ;// Y3 Y3 Y3 Y3
shufps xmm1, xmm1, 011111111b ;// CB3 CB3 CB3 CB3
shufps xmm2, xmm2, 011111111b ;// CR3 CR3 CR3 CR3
mulps xmm1, TBL_MultCB ;// 1.772*CB3 -0.34414*CB3 0 0
mulps xmm2, xmm7 ;// 0 -0.71414*CR3 1.402*CR3 0
addps xmm0, xmm1 ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
addps xmm0, xmm2 ;// B3 G3 R3 Y3
cvtps2dq xmm4, xmm4 ;// convert floats to ints
cvtps2dq xmm0, xmm0 ;// convert floats to ints
packssdw xmm4, xmm0 ;// convert ints to words (Y3 R3 G3 B3 Y2 R2 G2 B2)
packuswb xmm3, xmm4 ;// convert words to bytes (B3 G3 R3 Y3 ... B0 G0 R0 Y0)
movdqa [DST][4*4], xmm3 ;// write 4 pixels
;// next 8 columns
add DST, [PTBL][I+12]
add I, 16
jnz ILoop
;// HmaxCount--
;// if (HmaxCount == 0) {
;// HmaxCount += NbHmaxInRow
;// pRGB += DeltaRGB
;// }
dec [Ctx].FrameInfo.HmaxCount
jz EndOfRow
AEndOfRow:
mov [Ctx].FrameInfo.cRGB, DST
pop ebx
pop edi
pop esi
PROFILE_OUT "RGB_YCbCrConv SSE2"
ret
EndOfRow:
add DST, [Ctx].FrameInfo.DeltaRGB
mov eax, [Ctx].FrameInfo.NbHmaxRow
mov [Ctx].FrameInfo.HmaxCount, eax
jmp AEndOfRow
Y TEXTEQU <>
CB TEXTEQU <>
CR TEXTEQU <>
DST TEXTEQU <>
I TEXTEQU <>
PTBL TEXTEQU <>
RGB_YCbCrConv_SSE2 ENDP
;//=========================================================================
;// Convert YCbCr to RGB (SSE)
;//=========================================================================
RGB_YCbCrConv_SSE PROC
Y TEXTEQU <edi>
CB TEXTEQU <ebx>
CR TEXTEQU <ecx>
DST TEXTEQU <edx>
I TEXTEQU <eax>
PTBL TEXTEQU <DPTR esi>
PROFILE_IN
push esi
push edi
push ebx
;// DST = pRGB;
;// Y = &SampleY0;
;// CB = &SampleCB;
;// CR = &SampleCR;
;// PTBL = &PointerTable[0]
;// for (i = 0; i < 8*Vmax*Hmax; i++) {
;// ConvertRow(DST,Y,CB,CR)
;// Y = PTBL[4*i]
;// CB = PTBL[4*i+1]
;// CR = PTBL[4*i+2]
;// DST += PTBL[4*i+3]
;// }
mov DST, [Ctx].FrameInfo.cRGB
mov I, [Ctx].FrameInfo.HmaxVmax64
mov PTBL, [Ctx].FrameInfo.PointerTable
shr I, 1
neg I
movaps xmm7, TBL_MultCR
ILoop:
mov Y, [PTBL][I]
mov CB, [PTBL][I+4]
mov CR, [PTBL][I+8]
;// convert a row
;// R = (Y+S) + 1.402 Cr
;// G = (Y+S) - 0.34414 Cb - 0.71414 Cr
;// B = (Y+S) + 1.772 Cb
;// S = 128 for 8-bit precision, 2048 for 12-bit precision
movaps xmm6, TBL_MultCB ;// TBL_MultCB
movaps xmm0, [Y] ;// Y3 Y2 Y1 Y0
movaps xmm1, [CB] ;// CB3 CB2 CB1 CB0
movaps xmm2, [CR] ;// CR3 CR2 CR1 CR0
addps xmm0, [Ctx].FrameInfo.S ;// Y+S
movaps xmm3, xmm0 ;// Y3 Y2 Y1 Y0
movaps xmm4, xmm1 ;// CB3 CB2 CB1 CB0
movaps xmm5, xmm2 ;// CR3 CR2 CR1 CR0
shufps xmm3, xmm3, 0 ;// Y0 Y0 Y0 Y0
shufps xmm4, xmm4, 0 ;// CB0 CB0 CB0 CB0
shufps xmm5, xmm5, 0 ;// CR0 CR0 CR0 CR0
mulps xmm4, xmm6 ;// 1.772*CB0 -0.34414*CB0 0 0
mulps xmm5, xmm7 ;// 0 -0.71414*CR0 1.402*CR0 0
addps xmm3, xmm4 ;// Y0+1.772*CB0 Y0-0.34414*CB0 Y0 Y0
addps xmm3, xmm5 ;// B0 G0 R0 Y0
movaps xmm4, xmm0 ;// Y3 Y2 Y1 Y0
movaps xmm5, xmm1 ;// CB3 CB2 CB1 CB0
movaps xmm6, xmm2 ;// CR3 CR2 CR1 CR0
shufps xmm4, xmm4, 01010101b ;// Y1 Y1 Y1 Y1
shufps xmm5, xmm5, 01010101b ;// CB1 CB1 CB1 CB1
shufps xmm6, xmm6, 01010101b ;// CR1 CR1 CR1 CR1
mulps xmm5, TBL_MultCB ;// 1.772*CB1 -0.34414*CB1 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR1 1.402*CR1 0
addps xmm4, xmm5 ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
addps xmm4, xmm6 ;// B1 G1 R1 Y1
cvtps2pi mm1, xmm3 ;// R0 Y0
cvtps2pi mm2, xmm4 ;// R1 Y1
movhlps xmm3, xmm3 ;// B0 G0 B0 G0
movhlps xmm4, xmm4 ;// B1 G1 B1 G1
cvtps2pi mm3, xmm3 ;// B0 G0
cvtps2pi mm4, xmm4 ;// B1 G1
packssdw mm1, mm3 ;// B0 G0 R0 Y0
packssdw mm2, mm4 ;// B1 G1 R1 Y1
packuswb mm1, mm2 ;// B1 G1 R1 Y1 B0 G0 R0 Y0
movq [DST][0], mm1 ;// write 2 pixels
movaps xmm4, xmm0 ;// Y3 Y2 Y1 Y0
movaps xmm5, xmm1 ;// CB3 CB2 CB1 CB0
movaps xmm6, xmm2 ;// CR3 CR2 CR1 CR0
shufps xmm4, xmm4, 010101010b ;// Y2 Y2 Y2 Y2
shufps xmm5, xmm5, 010101010b ;// CB2 CB2 CB2 CB2
shufps xmm6, xmm6, 010101010b ;// CR2 CR2 CR2 CR2
mulps xmm5, TBL_MultCB ;// 1.772*CB2 -0.34414*CB2 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR2 1.402*CR2 0
addps xmm4, xmm5 ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
addps xmm4, xmm6 ;// B2 G2 R2 Y2
shufps xmm0, xmm0, 011111111b ;// Y3 Y3 Y3 Y3
shufps xmm1, xmm1, 011111111b ;// CB3 CB3 CB3 CB3
shufps xmm2, xmm2, 011111111b ;// CR3 CR3 CR3 CR3
mulps xmm1, TBL_MultCB ;// 1.772*CB3 -0.34414*CB3 0 0
mulps xmm2, xmm7 ;// 0 -0.71414*CR3 1.402*CR3 0
addps xmm0, xmm1 ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
addps xmm0, xmm2 ;// B3 G3 R3 Y3
cvtps2pi mm1, xmm4 ;// R2 Y2
cvtps2pi mm2, xmm0 ;// R3 Y3
movhlps xmm4, xmm4 ;// B2 G2 B2 G2
movhlps xmm0, xmm0 ;// B3 G3 B3 G3
cvtps2pi mm3, xmm4 ;// B2 G2
cvtps2pi mm4, xmm0 ;// B3 G3
packssdw mm1, mm3 ;// B2 G2 R2 Y2
packssdw mm2, mm4 ;// B3 G3 R3 Y3
packuswb mm1, mm2 ;// B3 G3 R3 Y3 B2 G2 R2 Y2
movq [DST][8], mm1 ;// write 2 pixels
movaps xmm6, TBL_MultCB ;// TBL_MultCB
movaps xmm0, [Y][4*4] ;// Y3 Y2 Y1 Y0
movaps xmm1, [CB][4*4] ;// CB3 CB2 CB1 CB0
movaps xmm2, [CR][4*4] ;// CR3 CR2 CR1 CR0
addps xmm0, [Ctx].FrameInfo.S ;// Y+S
movaps xmm3, xmm0 ;// Y3 Y2 Y1 Y0
movaps xmm4, xmm1 ;// CB3 CB2 CB1 CB0
movaps xmm5, xmm2 ;// CR3 CR2 CR1 CR0
shufps xmm3, xmm3, 0 ;// Y0 Y0 Y0 Y0
shufps xmm4, xmm4, 0 ;// CB0 CB0 CB0 CB0
shufps xmm5, xmm5, 0 ;// CR0 CR0 CR0 CR0
mulps xmm4, xmm6 ;// 1.772*CB0 -0.34414*CB0 0 0
mulps xmm5, xmm7 ;// 0 -0.71414*CR0 1.402*CR0 0
addps xmm3, xmm4 ;// Y0+1.772*CB0 Y0-0.34414*CB0 Y0 Y0
addps xmm3, xmm5 ;// B0 G0 R0 Y0
movaps xmm4, xmm0 ;// Y3 Y2 Y1 Y0
movaps xmm5, xmm1 ;// CB3 CB2 CB1 CB0
movaps xmm6, xmm2 ;// CR3 CR2 CR1 CR0
shufps xmm4, xmm4, 01010101b ;// Y1 Y1 Y1 Y1
shufps xmm5, xmm5, 01010101b ;// CB1 CB1 CB1 CB1
shufps xmm6, xmm6, 01010101b ;// CR1 CR1 CR1 CR1
mulps xmm5, TBL_MultCB ;// 1.772*CB1 -0.34414*CB1 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR1 1.402*CR1 0
addps xmm4, xmm5 ;// Y1+1.772*CB1 Y1-0.34414*CB1 Y1 Y1
addps xmm4, xmm6 ;// B1 G1 R1 Y1
cvtps2pi mm1, xmm3 ;// R0 Y0
cvtps2pi mm2, xmm4 ;// R1 Y1
movhlps xmm3, xmm3 ;// B0 G0 B0 G0
movhlps xmm4, xmm4 ;// B1 G1 B1 G1
cvtps2pi mm3, xmm3 ;// B0 G0
cvtps2pi mm4, xmm4 ;// B1 G1
packssdw mm1, mm3 ;// B0 G0 R0 Y0
packssdw mm2, mm4 ;// B1 G1 R1 Y1
packuswb mm1, mm2 ;// B1 G1 R1 Y1 B0 G0 R0 Y0
movq [DST][16], mm1 ;// write 2 pixels
movaps xmm4, xmm0 ;// Y3 Y2 Y1 Y0
movaps xmm5, xmm1 ;// CB3 CB2 CB1 CB0
movaps xmm6, xmm2 ;// CR3 CR2 CR1 CR0
shufps xmm4, xmm4, 010101010b ;// Y2 Y2 Y2 Y2
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -