📄 mc-a.asm
字号:
;*****************************************************************************;* mc.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2003 x264 project;* $Id: mc.asm,v 1.3 2004/06/18 01:59:58 chenm001 Exp $;*;* Authors: Min Chen <chenm001.163.com> (converted to nasm);* Laurent Aimar <fenrir@via.ecp.fr> (init algorithm);*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.;*****************************************************************************;*****************************************************************************;* *;* Revision history: *;* *;* 2004.05.17 portab mc_copy_w4/8/16 (CM) *;* *;*****************************************************************************BITS 32;=============================================================================; Macros and other preprocessor constants;=============================================================================%include "i386inc.asm";=============================================================================; Constants;=============================================================================SECTION_RODATAALIGN 16pw_4: times 4 dw 4pw_8: times 4 dw 8pw_32: times 4 dw 32pw_64: times 4 dw 64;=============================================================================; Code;=============================================================================SECTION .textcglobal x264_pixel_avg_w4_mmxextcglobal x264_pixel_avg_w8_mmxextcglobal x264_pixel_avg_w16_mmxextcglobal x264_pixel_avg_w16_sse2cglobal x264_pixel_avg_weight_4x4_mmxextcglobal x264_pixel_avg_weight_w8_mmxextcglobal x264_pixel_avg_weight_w16_mmxextcglobal x264_mc_copy_w4_mmxcglobal x264_mc_copy_w8_mmxcglobal x264_mc_copy_w16_mmxcglobal x264_mc_copy_w16_sse2cglobal x264_mc_chroma_mmxext;=============================================================================; pixel avg;=============================================================================ALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int i_dst_stride,; uint8_t *src1, int i_src1_stride,; uint8_t *src2, int i_src2_stride,; int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w4_mmxext: push ebp push ebx push esi push edi mov edi, [esp+20] ; dst mov ebx, [esp+28] ; src1 mov ecx, [esp+36] ; src2 mov esi, [esp+24] ; i_dst_stride mov eax, [esp+32] ; i_src1_stride mov edx, [esp+40] ; i_src2_stride mov ebp, [esp+44] ; i_heightALIGN 4.height_loop movd mm0, [ebx] pavgb mm0, [ecx] movd mm1, [ebx+eax] pavgb mm1, [ecx+edx] movd [edi], mm0 movd [edi+esi], mm1 dec ebp dec ebp lea ebx, [ebx+eax*2] lea ecx, [ecx+edx*2] lea edi, [edi+esi*2] jne .height_loop pop edi pop esi pop ebx pop ebp ret ALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w8_mmxext( uint8_t *dst, int i_dst_stride,; uint8_t *src1, int i_src1_stride,; uint8_t *src2, int i_src2_stride,; int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w8_mmxext: push ebp push ebx push esi push edi mov edi, [esp+20] ; dst mov ebx, [esp+28] ; src1 mov ecx, [esp+36] ; src2 mov esi, [esp+24] ; i_dst_stride mov eax, [esp+32] ; i_src1_stride mov edx, [esp+40] ; i_src2_stride mov ebp, [esp+44] ; i_heightALIGN 4.height_loop movq mm0, [ebx] pavgb mm0, [ecx] movq [edi], mm0 dec ebp lea ebx, [ebx+eax] lea ecx, [ecx+edx] lea edi, [edi+esi] jne .height_loop pop edi pop esi pop ebx pop ebp retALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w16_mmxext( uint8_t *dst, int i_dst_stride,; uint8_t *src1, int i_src1_stride,; uint8_t *src2, int i_src2_stride,; int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w16_mmxext: push ebp push ebx push esi push edi mov edi, [esp+20] ; dst mov ebx, [esp+28] ; src1 mov ecx, [esp+36] ; src2 mov esi, [esp+24] ; i_dst_stride mov eax, [esp+32] ; i_src1_stride mov edx, [esp+40] ; i_src2_stride mov ebp, [esp+44] ; i_heightALIGN 4.height_loop movq mm0, [ebx ] movq mm1, [ebx+8] pavgb mm0, [ecx ] pavgb mm1, [ecx+8] movq [edi ], mm0 movq [edi+8], mm1 dec ebp lea ebx, [ebx+eax] lea ecx, [ecx+edx] lea edi, [edi+esi] jne .height_loop pop edi pop esi pop ebx pop ebp retALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w16_sse2( uint8_t *dst, int i_dst_stride,; uint8_t *src1, int i_src1_stride,; uint8_t *src2, int i_src2_stride,; int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w16_sse2: push ebp push ebx push esi push edi mov edi, [esp+20] ; dst mov ebx, [esp+28] ; src1 mov ecx, [esp+36] ; src2 mov esi, [esp+24] ; i_dst_stride mov eax, [esp+32] ; i_src1_stride mov edx, [esp+40] ; i_src2_stride mov ebp, [esp+44] ; i_heightALIGN 4.height_loop movdqu xmm0, [ebx] pavgb xmm0, [ecx] movdqu [edi], xmm0 dec ebp lea ebx, [ebx+eax] lea ecx, [ecx+edx] lea edi, [edi+esi] jne .height_loop pop edi pop esi pop ebx pop ebp ret;=============================================================================; weighted prediction;=============================================================================; implicit bipred only:; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64%macro BIWEIGHT_4P_MMX 2 movd mm0, %1 movd mm1, %2 punpcklbw mm0, mm7 punpcklbw mm1, mm7 pmullw mm0, mm4 pmullw mm1, mm5 paddw mm0, mm1 paddw mm0, mm6 psraw mm0, 6 pmaxsw mm0, mm7 packuswb mm0, mm0 movd %1, mm0%endmacro%macro BIWEIGHT_START_MMX 0 push edi push esi picpush ebx picgetgot ebx mov edi, [picesp+12] ; dst mov esi, [picesp+16] ; i_dst mov edx, [picesp+20] ; src mov ecx, [picesp+24] ; i_src pshufw mm4, [picesp+28], 0 ; weight_dst movq mm5, [pw_64 GOT_ebx] psubw mm5, mm4 ; weight_src movq mm6, [pw_32 GOT_ebx] ; rounding pxor mm7, mm7%endmacro%macro BIWEIGHT_END_MMX 0 picpop ebx pop esi pop edi ret%endmacroALIGN 16;-----------------------------------------------------------------------------; int __cdecl x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int );-----------------------------------------------------------------------------x264_pixel_avg_weight_w16_mmxext: BIWEIGHT_START_MMX mov eax, [picesp+32] ; i_height ALIGN 4 .height_loop
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -