📄 rgbconv.asm
字号:
shufps xmm5, xmm5, 010101010b ;// CB2 CB2 CB2 CB2
shufps xmm6, xmm6, 010101010b ;// CR2 CR2 CR2 CR2
mulps xmm5, TBL_MultCB ;// 1.772*CB2 -0.34414*CB2 0 0
mulps xmm6, xmm7 ;// 0 -0.71414*CR2 1.402*CR2 0
addps xmm4, xmm5 ;// Y2+1.772*CB2 Y2-0.34414*CB2 Y2 Y2
addps xmm4, xmm6 ;// B2 G2 R2 Y2
shufps xmm0, xmm0, 011111111b ;// Y3 Y3 Y3 Y3
shufps xmm1, xmm1, 011111111b ;// CB3 CB3 CB3 CB3
shufps xmm2, xmm2, 011111111b ;// CR3 CR3 CR3 CR3
mulps xmm1, TBL_MultCB ;// 1.772*CB3 -0.34414*CB3 0 0
mulps xmm2, xmm7 ;// 0 -0.71414*CR3 1.402*CR3 0
addps xmm0, xmm1 ;// Y3+1.772*CB3 Y3-0.34414*CB3 Y3 Y3
addps xmm0, xmm2 ;// B3 G3 R3 Y3
cvtps2pi mm1, xmm4 ;// R2 Y2
cvtps2pi mm2, xmm0 ;// R3 Y3
movhlps xmm4, xmm4 ;// B2 G2 B2 G2
movhlps xmm0, xmm0 ;// B3 G3 B3 G3
cvtps2pi mm3, xmm4 ;// B2 G2
cvtps2pi mm4, xmm0 ;// B3 G3
packssdw mm1, mm3 ;// B2 G2 R2 Y2
packssdw mm2, mm4 ;// B3 G3 R3 Y3
packuswb mm1, mm2 ;// B3 G3 R3 Y3 B2 G2 R2 Y2
movq [DST][24], mm1 ;// write 2 pixels
;// next 8 columns
add DST, [PTBL][I+12]
add I, 16
jnz ILoop
;// HmaxCount--
;// if (HmaxCount == 0) {
;// HmaxCount += NbHmaxInRow
;// pRGB += DeltaRGB
;// }
dec [Ctx].FrameInfo.HmaxCount
jz EndOfRow
AEndOfRow:
mov [Ctx].FrameInfo.cRGB, DST
pop ebx
pop edi
pop esi
PROFILE_OUT "RGB_YCbCrConv SSE"
ret
EndOfRow:
add DST, [Ctx].FrameInfo.DeltaRGB
mov eax, [Ctx].FrameInfo.NbHmaxRow
mov [Ctx].FrameInfo.HmaxCount, eax
jmp AEndOfRow
Y TEXTEQU <>
CB TEXTEQU <>
CR TEXTEQU <>
DST TEXTEQU <>
I TEXTEQU <>
PTBL TEXTEQU <>
RGB_YCbCrConv_SSE ENDP
;//=========================================================================
;// Convert Grayscale to RGB (SSE2)
;//=========================================================================
RGB_GrayConv_SSE2 PROC
Y TEXTEQU <eax>
DST TEXTEQU <ecx>
I TEXTEQU <edx>
PTBL TEXTEQU <DPTR esi>
PROFILE_IN
push esi
;// DST = pRGB;
;// Y = &SampleY0;
;// PTBL = &PointerTable[0]
;// for (i = 0; i < 8*Vmax*Hmax; i++) {
;// ConvertRow(DST,Y)
;// Y = PTBL[4*i]
;// DST += PTBL[4*i]
;// }
mov DST, [Ctx].FrameInfo.cRGB
mov I, [Ctx].FrameInfo.HmaxVmax64
mov PTBL, [Ctx].FrameInfo.PointerTable
shr I, 2
movaps xmm7, [Ctx].FrameInfo.S
neg I
ILoop:
mov Y, [PTBL][I]
;// duplicate the Y value
movaps xmm0, [Y] ;// Y3 Y2 Y1 Y0
movaps xmm1, [Y][4*4] ;// Y7 Y6 Y5 Y4
addps xmm0, xmm7 ;// Y+S
addps xmm1, xmm7 ;// Y+S
cvtps2dq xmm0, xmm0 ;// Y3 Y2 Y1 Y0
cvtps2dq xmm1, xmm1 ;// Y7 Y6 Y5 Y4
pshufd xmm2, xmm0, 011111111b ;// Y3 Y3 Y3 Y3
pshufd xmm3, xmm0, 010101010b ;// Y2 Y2 Y2 Y2
pshufd xmm4, xmm0, 001010101b ;// Y1 Y1 Y1 Y1
pshufd xmm0, xmm0, 000000000b ;// Y0 Y0 Y0 Y0
packssdw xmm3, xmm2 ;// Y3 Y3 Y3 Y3 Y2 Y2 Y2 Y2
packssdw xmm0, xmm4 ;// Y1 Y1 Y1 Y1 Y0 Y0 Y0 Y0
pshufd xmm2, xmm1, 011111111b ;// Y7 Y7 Y7 Y7
packuswb xmm0, xmm3 ;// Y3 Y3 Y3 Y3 ... Y0 Y0 Y0 Y0
pshufd xmm3, xmm1, 010101010b ;// Y6 Y6 Y6 Y6
pshufd xmm4, xmm1, 001010101b ;// Y5 Y5 Y5 Y5
pshufd xmm1, xmm1, 000000000b ;// Y4 Y4 Y4 Y4
packssdw xmm3, xmm2 ;// Y7 Y7 Y7 Y7 Y6 Y6 Y6 Y6
packssdw xmm1, xmm4 ;// Y5 Y5 Y5 Y5 Y4 Y4 Y4 Y4
packuswb xmm1, xmm3 ;// Y7 Y7 Y7 Y7 ... Y4 Y4 Y4 Y4
movdqa [DST], xmm0 ;// write 4 pixels
movdqa [DST][4*4], xmm1 ;// write 4 pixels
;// next 8 columns
add DST, [PTBL][I+4]
add I, 8
jnz ILoop
;// HmaxCount--
;// if (HmaxCount == 0) {
;// HmaxCount += NbHmaxInRow
;// pRGB += DeltaRGB
;// }
dec [Ctx].FrameInfo.HmaxCount
jz EndOfRow
AEndOfRow:
mov [Ctx].FrameInfo.cRGB, DST
pop esi
PROFILE_OUT "RGB_GrayConv SSE2"
ret
EndOfRow:
add DST, [Ctx].FrameInfo.DeltaRGB
mov eax, [Ctx].FrameInfo.NbHmaxRow
mov [Ctx].FrameInfo.HmaxCount, eax
jmp AEndOfRow
Y TEXTEQU <>
CB TEXTEQU <>
CR TEXTEQU <>
DST TEXTEQU <>
I TEXTEQU <>
PTBL TEXTEQU <>
RGB_GrayConv_SSE2 ENDP
;//=========================================================================
;// Convert Grayscale to RGB (SSE)
;//=========================================================================
RGB_GrayConv_SSE PROC
Y TEXTEQU <eax>
DST TEXTEQU <ecx>
I TEXTEQU <edx>
PTBL TEXTEQU <DPTR esi>
PROFILE_IN
push esi
;// DST = pRGB;
;// Y = &SampleY0;
;// PTBL = &PointerTable[0]
;// for (i = 0; i < 8*Vmax*Hmax; i++) {
;// ConvertRow(DST,Y)
;// Y = PTBL[4*i]
;// DST += PTBL[4*i]
;// }
mov DST, [Ctx].FrameInfo.cRGB
mov I, [Ctx].FrameInfo.HmaxVmax64
mov PTBL, [Ctx].FrameInfo.PointerTable
shr I, 2
movaps xmm7, [Ctx].FrameInfo.S
neg I
ILoop:
mov Y, [PTBL][I]
;// duplicate the Y value
movaps xmm0, [Y] ;// Y3 Y2 Y1 Y0
movaps xmm1, [Y][4*4] ;// Y7 Y6 Y5 Y4
addps xmm0, xmm7 ;// Y+S
addps xmm1, xmm7 ;// Y+S
cvtps2pi mm1, xmm0 ;// Y1 Y0
cvtps2pi mm2, xmm1 ;// Y5 Y4
movhlps xmm0, xmm0 ;// Y3 Y2 Y3 Y2
movhlps xmm1, xmm1 ;// Y7 Y6 Y7 Y6
cvtps2pi mm3, xmm0 ;// Y3 Y2
cvtps2pi mm4, xmm1 ;// Y7 Y6
packssdw mm1, mm3 ;// Y3 Y2 Y1 Y0
packssdw mm2, mm4 ;// Y7 Y6 Y5 Y4
pshufw mm5, mm1, 011111111b ;// Y3 Y3 Y3 Y3
pshufw mm6, mm1, 010101010b ;// Y2 Y2 Y2 Y2
pshufw mm7, mm1, 001010101b ;// Y1 Y1 Y1 Y1
pshufw mm1, mm1, 000000000b ;// Y0 Y0 Y0 Y0
packuswb mm6, mm5 ;// Y3 Y3 Y3 Y3 Y2 Y2 Y2 Y2
packuswb mm1, mm7 ;// Y1 Y1 Y1 Y1 Y0 Y0 Y0 Y0
pshufw mm3, mm2, 011111111b ;// Y7 Y7 Y7 Y7
pshufw mm4, mm2, 010101010b ;// Y6 Y6 Y6 Y6
pshufw mm5, mm2, 001010101b ;// Y5 Y5 Y5 Y5
pshufw mm2, mm2, 000000000b ;// Y4 Y4 Y4 Y4
packuswb mm4, mm3 ;// Y7 Y7 Y7 Y7 Y6 Y6 Y6 Y6
packuswb mm2, mm5 ;// Y5 Y5 Y5 Y5 Y4 Y4 Y4 Y4
movq [DST], mm1 ;// write 2 pixels
movq [DST][8], mm6 ;// write 2 pixels
movq [DST][16], mm2 ;// write 2 pixels
movq [DST][24], mm4 ;// write 2 pixels
;// next 8 columns
add DST, [PTBL][I+4]
add I, 8
jnz ILoop
;// HmaxCount--
;// if (HmaxCount == 0) {
;// HmaxCount += NbHmaxInRow
;// pRGB += DeltaRGB
;// }
dec [Ctx].FrameInfo.HmaxCount
jz EndOfRow
AEndOfRow:
mov [Ctx].FrameInfo.cRGB, DST
pop esi
PROFILE_OUT "RGB_GrayConv SSE2"
ret
EndOfRow:
add DST, [Ctx].FrameInfo.DeltaRGB
mov eax, [Ctx].FrameInfo.NbHmaxRow
mov [Ctx].FrameInfo.HmaxCount, eax
jmp AEndOfRow
Y TEXTEQU <>
CB TEXTEQU <>
CR TEXTEQU <>
DST TEXTEQU <>
I TEXTEQU <>
PTBL TEXTEQU <>
RGB_GrayConv_SSE ENDP
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -