⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mc-a.asm

📁 DM642 H.264 codec DM642 H.264 codec DM642 H.264 codec DM642 H.264 codec
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;*****************************************************************************;* mc.asm: h264 encoder library;*****************************************************************************;* Copyright (C) 2003 x264 project;* $Id: mc.asm,v 1.3 2004/06/18 01:59:58 chenm001 Exp $;*;* Authors: Min Chen <chenm001.163.com> (converted to nasm);*          Laurent Aimar <fenrir@via.ecp.fr> (init algorithm);*;* This program is free software; you can redistribute it and/or modify;* it under the terms of the GNU General Public License as published by;* the Free Software Foundation; either version 2 of the License, or;* (at your option) any later version.;*;* This program is distributed in the hope that it will be useful,;* but WITHOUT ANY WARRANTY; without even the implied warranty of;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the;* GNU General Public License for more details.;*;* You should have received a copy of the GNU General Public License;* along with this program; if not, write to the Free Software;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.;*****************************************************************************;*****************************************************************************;*                                                                           *;*  Revision history:                                                        *;*                                                                           *;*  2004.05.17 portab mc_copy_w4/8/16 (CM)                                   *;*                                                                           *;*****************************************************************************BITS 64;=============================================================================; Macros and other preprocessor constants;=============================================================================%include "amd64inc.asm";=============================================================================; Constants;=============================================================================SECTION .rodataALIGN 16pw_4:  times 4 dw  4pw_8:  times 4 dw  8pw_32: times 4 dw 32pw_64: times 4 dw 64;=============================================================================; Code;=============================================================================SECTION .textcglobal x264_pixel_avg_w4_mmxextcglobal x264_pixel_avg_w8_mmxextcglobal x264_pixel_avg_w16_mmxextcglobal x264_pixel_avg_w16_sse2cglobal x264_pixel_avg_weight_4x4_mmxextcglobal x264_pixel_avg_weight_w8_mmxextcglobal x264_pixel_avg_weight_w16_mmxextcglobal x264_mc_copy_w4_mmxextcglobal x264_mc_copy_w8_mmxextcglobal x264_mc_copy_w16_mmxextcglobal x264_mc_copy_w16_sse2cglobal x264_mc_chroma_sse;=============================================================================; pixel avg;=============================================================================ALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w4_mmxext( uint8_t *dst,  int i_dst_stride,;                                uint8_t *src1, int i_src1_stride,;                                uint8_t *src2, int i_src2_stride,;                                int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w4_mmxext:    mov         r10, parm5q         ; src2    movsxd      r11, parm6d         ; i_src2_stride    movsxd      rax, parm7d         ; i_heightALIGN 4.height_loop        movd        mm0, [parm3q]    pavgb       mm0, [r10]    movd        mm1, [parm3q+parm4q]    pavgb       mm1, [r10+r11]    movd        [parm1q], mm0    movd        [parm1q+parm2q], mm1    dec         rax    dec         rax    lea         parm3q, [parm3q+parm4q*2]    lea         r10, [r10+r11*2]    lea         parm1q, [parm1q+parm2q*2]    jne         .height_loop    ret                          ALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w8_mmxext( uint8_t *dst,  int i_dst_stride,;                                uint8_t *src1, int i_src1_stride,;                                uint8_t *src2, int i_src2_stride,;                                int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w8_mmxext:    mov         r10, parm5q         ; src2    movsxd      r11, parm6d         ; i_src2_stride    movsxd      rax, parm7d         ; i_heightALIGN 4.height_loop        movq        mm0, [parm3q]    pavgb       mm0, [r10]    movq        [parm1q], mm0    dec         rax    lea         parm3q, [parm3q+parm4q]    lea         r10, [r10+r11]    lea         parm1q, [parm1q+parm2q]    jne         .height_loop    retALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w16_mmxext( uint8_t *dst,  int i_dst_stride,;                                 uint8_t *src1, int i_src1_stride,;                                 uint8_t *src2, int i_src2_stride,;                                 int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w16_mmxext:    mov         r10, parm5q         ; src2    movsxd      r11, parm6d         ; i_src2_stride    movsxd      rax, parm7d         ; i_heightALIGN 4.height_loop        movq        mm0, [parm3q  ]    movq        mm1, [parm3q+8]    pavgb       mm0, [r10  ]    pavgb       mm1, [r10+8]    movq        [parm1q  ], mm0    movq        [parm1q+8], mm1    dec         rax    lea         parm3q, [parm3q+parm4q]    lea         r10, [r10+r11]    lea         parm1q, [parm1q+parm2q]    jne         .height_loop    retALIGN 16;-----------------------------------------------------------------------------; void x264_pixel_avg_w16_sse2( uint8_t *dst,  int i_dst_stride,;                               uint8_t *src1, int i_src1_stride,;                               uint8_t *src2, int i_src2_stride,;                               int i_height );;-----------------------------------------------------------------------------x264_pixel_avg_w16_sse2:    mov         r10, parm5q         ; src2    movsxd      r11, parm6d         ; i_src2_stride    movsxd      rax, parm7d         ; i_heightALIGN 4.height_loop        movdqu      xmm0, [parm3q]    pavgb       xmm0, [r10]    movdqu      [parm1q], xmm0    dec         rax    lea         parm3q, [parm3q+parm4q]    lea         r10, [r10+r11]    lea         parm1q, [parm1q+parm2q]    jne         .height_loop    ret;=============================================================================; weighted prediction;=============================================================================; implicit bipred only:; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64%macro BIWEIGHT_4P_MMX 2    movd      mm0, %1    movd      mm1, %2    punpcklbw mm0, mm7    punpcklbw mm1, mm7    pmullw    mm0, mm4    pmullw    mm1, mm5    paddw     mm0, mm1    paddw     mm0, mm6    psraw     mm0, 6    pmaxsw    mm0, mm7    packuswb  mm0, mm0    movd      %1,  mm0%endmacro%macro BIWEIGHT_START_MMX 0;   mov     rdi, rdi      ; dst;   movsxd  rsi, esi      ; i_dst;   mov     rdx, rdx      ; src;   movsxd  rcx, ecx      ; i_src;   movsxd  r8,  r8d      ; i_weight_dst;   movsxd  r9,  r9d      ; i_height    mov     r11d, parm6d  ; i_height    movd    mm4, parm5d    pshufw  mm4, mm4, 0   ; weight_dst    movq    mm5, [pw_64 GLOBAL]    psubw   mm5, mm4      ; weight_src    movq    mm6, [pw_32 GLOBAL] ; rounding    pxor    mm7, mm7    ALIGN 4    .height_loop%endmacroALIGN 16;-----------------------------------------------------------------------------;   int x264_pixel_avg_weight_w16_mmxext( uint8_t *dst, int, uint8_t *src, int, int i_weight, int );-----------------------------------------------------------------------------x264_pixel_avg_weight_w16_mmxext:    BIWEIGHT_START_MMX    BIWEIGHT_4P_MMX  [parm1q   ], [parm3q   ]    BIWEIGHT_4P_MMX  [parm1q+ 4], [parm3q+ 4]    BIWEIGHT_4P_MMX  [parm1q+ 8], [parm3q+ 8]    BIWEIGHT_4P_MMX  [parm1q+12], [parm3q+12]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -