convert_yuy2.h

来自「从FFMPEG转换而来的H264解码程序,VC下编译..」· C头文件代码 · 共 369 行 · 第 1/2 页
369 行
// Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
// http://www.avisynth.org

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
// http://www.gnu.org/copyleft/gpl.html .
//
// Linking Avisynth statically or dynamically with other modules is making a
// combined work based on Avisynth.  Thus, the terms and conditions of the GNU
// General Public License cover the whole combination.
//
// As a special exception, the copyright holders of Avisynth give you
// permission to link Avisynth with independent modules that communicate with
// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
// terms of these independent modules, and to copy and distribute the
// resulting combined work under terms of your choice, provided that
// every copy of the combined work is accompanied by a complete copy of
// the source code of Avisynth (the version of Avisynth used to produce the
// combined work), being distributed under the terms of the GNU General
// Public License plus this exception.  An independent module is a module
// which is not derived from or based on Avisynth, such as 3rd-party filters,
// import and export plugins, or graphical user interfaces.

#ifndef _CONVERT_YUY2_H_
#define _CONVERT_YUY2_H_

#include "simd.h"

#pragma warning(push)
#pragma warning(disable: 4700 4701)

template<bool RGB24,bool DUPL> struct Tmmx_ConvertRGBtoYUY2
{
 /*****************************
  * MMX code by Klaus Post
  * Updated for PC Levels/Rec.709
  *   by Ian Brabham Nov 2004
  *
  * - Notes on MMX:
  * Fractions are one bit less than integer code,
  *  but otherwise the algorithm is the same, except
  *  r_y and b_y are calculated at the same time.
  * Order of execution has been changed much for better pairing possibilities.
  * It is important that the 64bit values are 8 byte-aligned
  *  otherwise it will give a huge penalty when accessing them.
  * Instructions pair rather ok, instructions from the top is merged
  *  into last part, to avoid dependency stalls.
  *  (paired instructions are indented by a space)
  *****************************/

 static void mmx_ConvertRGBtoYUY2(const unsigned char *SRC0,unsigned char *DST,stride_t src_pitch, stride_t dst_pitch,int w, int h, int MATRIX)
  {
   int lwidth_bytes;
   if (RGB24)
    lwidth_bytes = w*3;    // Width in bytes
   else
   lwidth_bytes = w<<2;    // Width in bytes
   //SRC+=src_pitch*(h-1);       // ;Move source to bottom line (read top->bottom)

   static const int fraction[4] ={0x00084000,    //=(16.5) << 15 = 0x84000
                                  0x00084000,
                                  0x00004000,    //=(0.5) << 15 = 0x4000
                                  0x00004000};
   static __align8(const int64_t,cybgr_64[4]) ={0x000020DE40870C88LL,
                                                0x0000175C4EA507EDLL,
                                                0x000026464B230E97LL,
                                                0x00001B335B92093BLL};
   static const int y1y2_mult[4]={0x00004A85,    //=(255./219.) << 14
                                  0x00004A85,
                                  0x00004000,    //=1 << 14
                                  0x00004000};
   static __align8(const int64_t,fpix_mul[4]) ={0x0000503300003F74LL,    //=(1/((1-0.299)*255/112)<<15+0.5),  (1/((1-0.114)*255/112)<<15+0.5)
                                                0x0000476400003C97LL,    //=(1/((1-0.2125)*255/112)<<15+0.5), (1/((1-0.0721)*255/112)<<15+0.5)
                                                0x00005AF1000047F4LL,    //=(1/((1-0.299)*255/127)<<15+0.5),  (1/((1-0.114)*255/127)<<15+0.5)
                                                0x000050F3000044B4LL};   //=(1/((1-0.2125)*255/127)<<15+0.5), (1/((1-0.0721)*255/127)<<15+0.5)
   static const int sub_32[4]   ={0x0000FFE0,    //=-16*2
                                  0x0000FFE0,
                                  0x00000000,    //=0
                                  0x00000000};
   __m64 rb_mask =_mm_set_pi32(0x0000ffff,0x0000ffff);    //=Mask for unpacked R and B
   __m64 fpix_add=_mm_set_pi32(0x00808000,0x00808000);    //=(128.5) << 16
   __m64 chroma_mask2=_mm_set_pi16(-1,0x0000,-1,0x0000);
   const __m64 mm0=_mm_cvtsi32_si64(fraction[MATRIX]);
   const __m64 mm7=*(__m64*)(cybgr_64+MATRIX);
   const __m64 mm5=_mm_cvtsi32_si64(y1y2_mult[MATRIX]);
#define mmx_ConvertRGBtoYUY2_BUFSIZE 8192
   if(lwidth_bytes>mmx_ConvertRGBtoYUY2_BUFSIZE)
    lwidth_bytes=mmx_ConvertRGBtoYUY2_BUFSIZE;
   for (int y=0;y<h;SRC0+=src_pitch,DST+=dst_pitch,y++)
    {
     unsigned char SRC[mmx_ConvertRGBtoYUY2_BUFSIZE];
     memcpy(SRC,SRC0,lwidth_bytes);
     __m64 mm2,mm1;
     movq        (mm2,SRC);              // mm2= XXR2 G2B2 XXR1 G1B1
     punpcklbw   (mm1,mm2);             // mm1= XXxx R1xx G1xx B1xx
     if (RGB24)
      psllq       (mm2,8);               //Compensate for RGB24
     for (int RGBOFFSET=0,YUVOFFSET=0;RGBOFFSET<lwidth_bytes;)
      {
       __m64 mm6,mm4,mm3;
       punpckhbw   (mm2,mm0);             //mm2= 00XX 00R2 00G2 00B2
       psrlw       (mm1,8);               //mm1= 00XX 00R1 00G1 00B1

       movq        (mm6,mm1);             //mm6= 00XX 00R1 00G1 00B1
        pmaddwd     (mm1,mm7);             //mm1= v2v2 v2v2 v1v1 v1v1   y1 //(cyb*rgb[0] + cyg*rgb[1] + cyr*rgb[2] + 0x108000)
       if (DUPL)
        paddw       (mm6,mm6);             //mm6 = accumulated RGB values (for b_y and r_y)
       else
        paddw       (mm6,mm2);             //mm6 = accumulated RGB values (for b_y and r_y)

        pmaddwd  (  mm2,mm7    );       //mm2= w2w2 w2w2 w1w1 w1w1   y2 //(cyb*rgbnext[0] + cyg*rgbnext[1] + cyr*rgbnext[2] + 0x108000)
       paddd     ( mm1,mm0     );        //Add rounding fraction (16.5)<<15 to lower dword only
        paddd    (  mm2,mm0    );         //Add rounding fraction (16.5)<<15 to lower dword only
       movq      ( mm3,mm1     );
        movq     (  mm4,mm2    );
       psrlq     ( mm3,32      );
        pand     (  mm6,rb_mask);       //Clear out accumulated G-value mm6= 0000 RRRR 0000 BBBB
       psrlq     ( mm4,32      );
        paddd    (  mm1,mm3    );
       paddd     ( mm2,mm4     );
        psrld    (  mm1,15     );         //mm1= xxxx xxxx 0000 00y1 final value
       movd      ( mm3,sub_32[MATRIX]);//mm3 = -32
        psrld    (  mm2,15     );         //mm2= xxxx xxxx 0000 00y2 final value
       paddw     ( mm3,mm1     );
        pslld    (  mm6,14     );         //Shift up accumulated R and B values (<<15 in C)
       if (DUPL)
        paddw       (mm3,mm1);             //mm3 = y1+y1-32
       else
        paddw       (mm3,mm2);             //mm3 = y1+y2-32

        psllq      ( mm2,16 );             //mm2 Y2 shifted up (to clear fraction) mm2 ready
       pmaddwd     (mm3,mm5 );            //mm3=scaled_y (latency 2 cycles)
        por        ( mm1,mm2);             //mm1 = 0000 0000 00Y2 00Y1
       punpckldq   (mm3,mm3);             //Move scaled_y to upper dword mm3=SCAL ED_Y SCAL ED_Y
        movq       ( mm2,fpix_mul+MATRIX);
       psubd       (mm6,mm3);             //mm6 = b_y and r_y
        movq       ( mm4,fpix_add);
       psrad       (mm6,14);              //Shift down b_y and r_y (>>10 in C-code)
        movq       ( mm3,chroma_mask2);
       pmaddwd     (mm6,mm2);             //Mult b_y and r_y
       YUVOFFSET+=4;
       paddd       (mm6,mm4);             //Add 0x808000 to r_y and b_y
       RGBOFFSET+=(RGB24?6:8);
       pand        (mm6,mm3);             //Clear out fractions
        movq       ( mm2,SRC+RGBOFFSET); //mm2= XXR2 G2B2 XXR1 G1B1
       packuswb    (mm6,mm6);             //mm6 = VV00 UU00 VV00 UU00
       por         (mm6,mm1);             //Or luma and chroma together
        punpcklbw  ( mm1,mm2);             //mm1= XXxx R1xx G1xx B1xx
       movd        (DST+YUVOFFSET-4,mm6); //Store final pixel
       if (RGB24)
        psllq       (mm2,8);               //Compensate for RGB24
      }
    }
   _mm_empty();
  }
};

template<int uyvy,int rgb32> struct Tmmx_ConvertYUY2toRGB
{
private:
 static __forceinline void GET_Y(__m64 &mma,int _uyvy,const unsigned char* const edx)
  {
   if (_uyvy)
    psrlw (mma,8);
   else
    {
     static const int ofs_x00FF_00FF_00FF_00FF=16;
     pand (mma,edx+ofs_x00FF_00FF_00FF_00FF);
    }
  }
 static __forceinline void GET_UV(__m64 &mma,int _uyvy,const unsigned char* const edx)
  {
   GET_Y(mma,1-_uyvy,edx);
  }
convert_yuy2.h - 源码说明

本页面展示了「从FFMPEG转换而来的H264解码程序,VC下编译..」中的 convert_yuy2.h 源码文件，采用 C头文件编程语言编写，共 369 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与FFMPEG相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?