📄 video_yuv_mmx.s
字号:
/***************************************************************************** * video_yuv_mmx.S: YUV transformation, optimized for MMX processors ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN * * Authors: * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************//***************************************************************************** * Following functions are defined: * vout_YUV420_16_MMX * This function performs YUV12-to-RGB16 color conversion for H26x. * It handles any format in which there are three fields, the low * order field being B and fully contained in the low order byte, the * second field being G and being somewhere in bits 4 through 11, * and the high order field being R and fully contained in the high * order byte. * * The YUV12 input is planar, 8 bits per pel. The Y plane may have * a pitch of up to 768. It may have a width less than or equal * to the pitch. It must be DWORD aligned, and preferably QWORD * aligned. Pitch and Width must be a multiple of four. For best * performance, Pitch should not be 4 more than a multiple of 32. * Height may be any amount, but must be a multiple of two. The U * and V planes may have a different pitch than the Y plane, subject * to the same limitations. *****************************************************************************///.include iammx.inc//.include locals.inc.data .align 16RGB_formats: .long RGB565 .long RGB555 .long RGB664 .long RGB655Minusg: .long 0x00800080, 0x00800080Yadd: .long 0x10101010, 0x10101010VtR: .long 0x00660066, 0x00660066VtG: .long 0x00340034, 0x00340034UtG: .long 0x00190019, 0x00190019UtB: .long 0x00810081, 0x00810081Ymul: .long 0x004a004a, 0x004a004aUVtG: .long 0x00340019, 0x00340019VtRUtB: .long 0x01990205, 0x01990205fourbitu: .quad 0xf0f0f0f0f0f0f0f0fivebitu: .quad 0xe0e0e0e0e0e0e0e0sixbitu: .quad 0xc0c0c0c0c0c0c0c0.text#define LocalFrameSize 156#define RegisterStorageSize 16//#define DOUBLE /*double le nombre de colonnes *//* Arguments: */#define YPlane LocalFrameSize + RegisterStorageSize + 4#define UPlane LocalFrameSize + RegisterStorageSize + 8#define VPlane LocalFrameSize + RegisterStorageSize + 12#define FrameWidth LocalFrameSize + RegisterStorageSize + 16#define FrameHeight LocalFrameSize + RegisterStorageSize + 20#define YPitch LocalFrameSize + RegisterStorageSize + 24#define ChromaPitch LocalFrameSize + RegisterStorageSize + 28#define AspectAdjustmentCount LocalFrameSize + RegisterStorageSize + 32#define ColorConvertedFrame LocalFrameSize + RegisterStorageSize + 36#define DCIOffset LocalFrameSize + RegisterStorageSize + 40#define CCOffsetToLine0 LocalFrameSize + RegisterStorageSize + 44#define CCOPitch LocalFrameSize + RegisterStorageSize + 48#define CCType LocalFrameSize + RegisterStorageSize + 52#define EndOfArgList LocalFrameSize + RegisterStorageSize + 56/* Locals (on local stack frame) */#define CCOCursor 0#define CCOSkipDistance 4#define ChromaLineLen 8#define YCursor 12#define DistanceFromVToU 16#define EndOfChromaLine 20#define AspectCount 24#define AspectBaseCount 28#define tmpYCursorEven 32#define tmpYCursorOdd 36#define tmpCCOPitch 40#define temp_mmx 44#define RLeftShift 92#define GLeftShift 100#define RRightShift 108#define GRightShift 116#define BRightShift 124#define RUpperLimit 132#define GUpperLimit 140#define BUpperLimit 148/* * extern void C ConvertYUV420RGB16MMX ( * U8* YPlane, * U8* UPlane, * U8* VPlane, * UN FrameWidth, * UN FrameHeight, * UN YPitch, * UN VPitch, * UN AspectAdjustmentCount, * U8* ColorConvertedFrame, * U32 DCIOffset, * U32 CCOffsetToLine0, * IN CCOPitch, * IN CCType) * * The local variables are on the stack, * The tables are in the one and only data segment. * * CCOffsetToLine0 is relative to ColorConvertedFrame. * CCType used by RGB color convertors to determine the exact conversion type. * RGB565 = 0 * RGB555 = 1 * RGB664 = 2 * RGB655 = 3 */.globl ConvertYUV420RGB16MMXConvertYUV420RGB16MMX: pushl %esi pushl %edi pushl %ebp pushl %ebx subl $LocalFrameSize,%esp movl CCType(%esp),%eax cmpl $4,%eax jae finish jmp *RGB_formats(,%eax,4)RGB555: xorl %eax,%eax movl $2,%ebx /* 10-8 for byte shift */ movl %ebx,RLeftShift(%esp) movl %eax,RLeftShift+4(%esp) movl $5,%ebx movl %ebx,GLeftShift(%esp) movl %eax,GLeftShift+4(%esp) movl $9,%ebx movl %ebx,RRightShift(%esp) movl %eax,RRightShift+4(%esp) movl %ebx,GRightShift(%esp) movl %eax,GRightShift+4(%esp) movl %ebx,BRightShift(%esp) movl %eax,BRightShift+4(%esp) movq fivebitu,%mm0 movq %mm0,RUpperLimit(%esp) movq %mm0,GUpperLimit(%esp) movq %mm0,BUpperLimit(%esp) jmp RGBENDRGB664: xorl %eax,%eax movl $2,%ebx /* 8-6 */ movl %ebx,RLeftShift(%esp) movl %eax,RLeftShift+4(%esp) movl $4,%ebx movl %ebx,GLeftShift(%esp) movl %eax,GLeftShift+4(%esp) movl $8,%ebx movl %ebx,RRightShift(%esp) movl %eax,RRightShift+4(%esp) movl %ebx,GRightShift(%esp) movl %eax,GRightShift+4(%esp) movl $10,%ebx movl %ebx,BRightShift(%esp) movl %eax,BRightShift+4(%esp) movq sixbitu,%mm0 movq %mm0,RUpperLimit(%esp) movq %mm0,GUpperLimit(%esp) movq fourbitu,%mm0 movq %mm0,BUpperLimit(%esp) jmp RGBENDRGB655: xorl %eax,%eax movl $2,%ebx /* 8-6 */ movl %ebx,RLeftShift(%esp) movl %eax,RLeftShift+4(%esp) movl $5,%ebx movl %ebx,GLeftShift(%esp) movl %eax,GLeftShift+4(%esp) movl $8,%ebx movl %ebx,RRightShift(%esp) movl %eax,RRightShift+4(%esp) movl $9,%ebx movl %ebx,GRightShift(%esp) movl %eax,GRightShift+4(%esp) movl %ebx,BRightShift(%esp) movl %eax,BRightShift+4(%esp) movq sixbitu,%mm0 movq %mm0,RUpperLimit(%esp) movq fivebitu,%mm0 movq %mm0,GUpperLimit(%esp) movq %mm0,BUpperLimit(%esp) jmp RGBENDRGB565: xorl %eax,%eax movl $3,%ebx /* 8-5 */ movl %ebx,RLeftShift(%esp) movl %eax,RLeftShift+4(%esp) movl $5,%ebx movl %ebx,GLeftShift(%esp) movl %eax,GLeftShift+4(%esp) movl $9,%ebx movl %ebx,RRightShift(%esp) movl %eax,RRightShift+4(%esp) movl %ebx,BRightShift(%esp) movl %eax,BRightShift+4(%esp) movl $8,%ebx movl %ebx,GRightShift(%esp) movl %eax,GRightShift+4(%esp) movq fivebitu,%mm0 movq %mm0,RUpperLimit(%esp) movq %mm0,BUpperLimit(%esp) movq sixbitu,%mm0 movq %mm0,GUpperLimit(%esp)// jmp RGBENDRGBEND: movl VPlane(%esp),%ebx movl UPlane(%esp),%ecx subl %ebx,%ecx movl %ecx,DistanceFromVToU(%esp) movl ColorConvertedFrame(%esp),%eax addl DCIOffset(%esp),%eax addl CCOffsetToLine0(%esp),%eax movl %eax,CCOCursor(%esp) movl YPitch(%esp),%ecx movl FrameWidth(%esp),%ebx movl CCOPitch(%esp),%eax subl %ebx,%eax /* CCOPitch-FrameWidth */ subl %ebx,%eax /* CCOPitch-2*FrameWidth */ sarl %ebx /* FrameWidth/2 */ movl YPlane(%esp),%esi /* Fetch cursor over luma plane. */ movl %ebx,ChromaLineLen(%esp) /* FrameWidth/2 */ movl %eax,CCOSkipDistance(%esp) /* CCOPitch-3*FrameWidth */ movl %esi,YCursor(%esp) movl AspectAdjustmentCount(%esp),%edx movl VPlane(%esp),%esi cmpl $1,%edx je finish movl %edx,AspectCount(%esp) movl %edx,AspectBaseCount(%esp) xorl %eax,%eax movl ChromaLineLen(%esp),%edi movl %edi,EndOfChromaLine(%esp) movl CCOCursor(%esp),%edi movl DistanceFromVToU(%esp),%edx movl YCursor(%esp),%ebp /* Fetch Y Pitch. */ movl FrameWidth(%esp),%ebx addl %ebx,%ebp movl %ebp,tmpYCursorEven(%esp) movl YPitch(%esp),%eax addl %eax,%ebp movl %ebp,tmpYCursorOdd(%esp)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -