convert_yv12.h

来自「从FFMPEG转换而来的H264解码程序,VC下编译..」· C头文件代码 · 共 642 行 · 第 1/2 页
642 行
// Avisynth v2.5.  Copyright 2002 Ben Rudiak-Gould et al.
// http://www.avisynth.org

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
// http://www.gnu.org/copyleft/gpl.html .
//
// Linking Avisynth statically or dynamically with other modules is making a
// combined work based on Avisynth.  Thus, the terms and conditions of the GNU
// General Public License cover the whole combination.
//
// As a special exception, the copyright holders of Avisynth give you
// permission to link Avisynth with independent modules that communicate with
// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
// terms of these independent modules, and to copy and distribute the
// resulting combined work under terms of your choice, provided that
// every copy of the combined work is accompanied by a complete copy of
// the source code of Avisynth (the version of Avisynth used to produce the
// combined work), being distributed under the terms of the GNU General
// Public License plus this exception.  An independent module is a module
// which is not derived from or based on Avisynth, such as 3rd-party filters,
// import and export plugins, or graphical user interfaces.

#ifndef _CONVERT_YV12_H_
#define _CONVERT_YV12_H_

#include "simd.h"

#pragma warning(push)
#pragma warning(disable: 4700 4701)

/********************************
 * (c) Copyright 2003, Klaus Post
 *
 * Converts 8x2 (8 pixels, two lines) in parallel.
 * Requires mod8 pitch for output, and mod16 pitch for input.
 ********************************/
template<class _mm> struct TconvertYV12
{
 static void yuy2_to_yv12(const BYTE* src, int src_rowsize, stride_t src_pitch,
                          BYTE* dstY, BYTE* dstU, BYTE* dstV, stride_t dst_pitchY, stride_t dst_pitchUV,
                          int height)
  {
   if (_mm::align && (intptr_t(src)&15 || intptr_t(dstY)&15 || intptr_t(dstU)&15 || intptr_t(dstV)&15 || src_pitch&15 || dst_pitchY &15 ||dst_pitchUV&15))
    {
     TconvertYV12<typename _mm::T64>::yuy2_to_yv12(src,src_rowsize,src_pitch,dstY,dstU,dstV,dst_pitchY,dst_pitchUV,height);
     return;
    }
   typename _mm::__m mask1 = _mm::set_pi32(0x00ff00ff,0x00ff00ff);
   typename _mm::__m mask2 = _mm::set_pi32(0xff00ff00,0xff00ff00);

   stride_t src_pitch2 = src_pitch*2;
   stride_t dst_pitch2 = dst_pitchY*2;

   src_rowsize = (src_rowsize+3)/4;
   typename _mm::__m mm7,mm4,mm0,mm1,mm3,mm6,mm2,mm5;
   movq (mm7,mask2);
   movq (mm4,mask1);
   for (int y=0;y<height;y+=2,dstY+=dst_pitch2,dstU+=dst_pitchUV,dstV+=dst_pitchUV,src+=src_pitch2)
    {
     const BYTE *esi=src;
     for (int edx=0;edx<src_rowsize;edx+=_mm::size/2,esi+=_mm::size*2)
      {
       movq (mm0,esi);        // YUY2 upper line  (4 pixels luma, 2 chroma)
        movq (mm1,esi+src_pitch);   // YUY2 lower line
       movq (mm6,mm0);
        movq (mm2, esi+_mm::size);    // Load second pair
       movq (mm3, esi+src_pitch+_mm::size);
        movq (mm5,mm2);
       _mm::pavgb (mm6,mm1);         // Average (chroma)
        _mm::pavgb (mm5,mm3);        // Average Chroma (second pair)
       pand (mm0,mm4);          // Mask luma
        psrlq (mm5, 8);
       pand (mm1,mm4);          // Mask luma
        psrlq (mm6, 8);
       pand (mm2,mm4);          // Mask luma
        pand (mm3,mm4);
       pand (mm5,mm4);          // Mask chroma
        pand (mm6,mm4);         // Mask chroma
       packuswb (mm0, mm2);     // Pack luma (upper)
        packuswb (mm6, mm5);    // Pack chroma
       packuswb (mm1, mm3);     // Pack luma (lower)
        movq (mm5, mm6);        // Chroma copy
       pand (mm5, mm7);         // Mask V
        pand (mm6, mm4);        // Mask U
       psrlq (mm5,8);           // shift down V
        packuswb (mm5, mm7);    // Pack U
       packuswb (mm6, mm7);     // Pack V
       movq (dstY+edx*2,mm0);
        movq (dstY+dst_pitchY+edx*2,mm1);
       movd (dstV+edx, mm5);   // Store V
        movd (dstU+edx, mm6);  // Store U
      }
    }
   _mm::sfence();
   _mm::empty();
  }
 static void yuy2_i_to_yv12(const BYTE* src, int src_rowsize, stride_t src_pitch,
                            BYTE* dstY, BYTE* dstU, BYTE* dstV, stride_t dst_pitchY, stride_t dst_pitchUV,
                            int height)
  {
   if (_mm::align && (intptr_t(src)&15 || intptr_t(dstY)&15 || intptr_t(dstU)&15 || intptr_t(dstV)&15 || src_pitch&15 || dst_pitchY &15 ||dst_pitchUV&15))
    {
     TconvertYV12<typename _mm::T64>::yuy2_i_to_yv12(src,src_rowsize,src_pitch,dstY,dstU,dstV,dst_pitchY,dst_pitchUV,height);
     return;
    }
   typename _mm::__m mask1   =_mm::set_pi32(0x00ff00ff,0x00ff00ff);
   typename _mm::__m mask2   =_mm::set_pi32(0xff00ff00,0xff00ff00);
   typename _mm::__m add_ones=_mm::set1_pi8(0x01);

   stride_t src_pitch2 = src_pitch*2;
   stride_t dst_pitch2 = dst_pitchY*2;
   stride_t src_pitch4 = src_pitch*4;
   stride_t dst_pitch3 = dst_pitchY*3;

   src_rowsize = (src_rowsize+3)/4;
   typename _mm::__m mm7,mm4,mm0,mm1,mm3,mm6,mm2,mm5;
   movq (mm7,mask2);
   movq (mm4,mask1);
   for (int y=0;y<height;y+=4)
    {
     const BYTE *esi=src;
     int edx;
     for (edx=0;edx<src_rowsize;edx+=_mm::size/2,esi+=_mm::size*2)
      {
       movq (mm0,esi);        // YUY2 upper line  (4 pixels luma, 2 chroma)
        movq (mm1,esi+src_pitch2);   // YUY2 lower line
       movq (mm6,mm0);
        movq (mm2, esi+_mm::size);    // Load second pair
       movq (mm3, esi+src_pitch2+_mm::size);
        movq (mm5,mm2);

       _mm::pavgb (mm6,mm1);         // Average (chroma)
        _mm::pavgb (mm5,mm3);        // Average Chroma (second pair)
       psubusb (mm5, add_ones);         // Better rounding (thanks trbarry!)
        psubusb (mm6, add_ones);
       _mm::pavgb (mm6,mm0);         // Average (chroma) (upper = 75% lower = 25%)
        _mm::pavgb (mm5,mm2);        // Average Chroma (second pair) (upper = 75% lower = 25%)

       pand (mm0,mm4 );         // Mask luma
        psrlq (mm5, 8);
       pand (mm1,mm4 );         // Mask luma
        psrlq (mm6, 8);
       pand (mm2,mm4 );         // Mask luma
        pand (mm3,mm4);
       pand (mm5,mm4 );          // Mask chroma
        pand (mm6,mm4);          // Mask chroma
       packuswb (mm0, mm2 );    // Pack luma (upper)
        packuswb (mm6, mm5);    // Pack chroma
       packuswb (mm1, mm3 );    // Pack luma (lower)
        movq (mm5, mm6);        // Chroma copy
       pand (mm5, mm7 );        // Mask V
        pand (mm6, mm4);        // Mask U
       psrlq (mm5,8);            // shift down V
        packuswb (mm5, mm7);     // Pack U
       packuswb (mm6, mm7 );    // Pack V
       movq (dstY+edx*2,mm0);
       movq (dstY+dst_pitchY*2+edx*2,mm1);
       movd (dstV+edx, mm5);   // Store V
       movd (dstU+edx, mm6);  // Store U
      }
     dstY+=dst_pitchY;
     dstU+=dst_pitchUV;
     dstV+=dst_pitchUV;
     esi=src+src_pitch;
     for (edx=0;edx<src_rowsize;edx+=_mm::size/2,esi+=_mm::size*2)
      {
       movq (mm0,esi);        // YUY2 upper line  (4 pixels luma, 2 chroma)
        movq (mm1,esi+src_pitch2);   // YUY2 lower line
       movq (mm6,mm0);
        movq (mm2, esi+_mm::size);    // Load second pair
       movq (mm3, esi+src_pitch2+_mm::size);
        movq (mm5,mm2);

       _mm::pavgb (mm6,mm1);         // Average (chroma)
        _mm::pavgb (mm5,mm3);        // Average Chroma (second pair)
       psubusb (mm5, add_ones);         // Better rounding (thanks trbarry!)
        psubusb (mm6, add_ones);
       _mm::pavgb (mm6,mm1);         // Average (chroma) (upper = 25% lower = 75%)
        _mm::pavgb (mm5,mm3);        // Average Chroma (second pair) (upper = 25% lower = 75%)

       pand (mm0,mm4);          // Mask luma
        psrlq (mm5, 8);
       pand (mm1,mm4);          // Mask luma
        psrlq (mm6, 8);
       pand (mm2,mm4);          // Mask luma
        pand (mm3,mm4);
       pand (mm5,mm4);           // Mask chroma
        pand (mm6,mm4);          // Mask chroma
       packuswb (mm0, mm2);     // Pack luma (upper)
        packuswb (mm6, mm5);    // Pack chroma
       packuswb (mm1, mm3);     // Pack luma (lower)
        movq (mm5, mm6);        // Chroma copy
       pand (mm5, mm7);         // Mask V
        pand (mm6, mm4);        // Mask U
       psrlq (mm5,8);            // shift down V
        packuswb (mm5, mm7);     // Pack U
       packuswb (mm6, mm7);     // Pack V

       movq (dstY+edx*2,mm0);
       movq (dstY+(dst_pitchY*2)+edx*2,mm1);
       movd (dstV+edx, mm5);   // Store V
       movd (dstU+edx, mm6);  // Store U
      }
     dstY+=dst_pitch3;
     dstU+=dst_pitchUV;
     dstV+=dst_pitchUV;
     src+=src_pitch4;
    }
   _mm::sfence();
   _mm::empty();
  }

 static void yv12_to_yuy2(const BYTE* srcY, const BYTE* srcU, const BYTE* srcV, int src_rowsize, stride_t src_pitch, stride_t src_pitch_uv,
                          BYTE* dst, stride_t dst_pitch,
                          int height)
  {
   if (_mm::align && (intptr_t(srcY)&15 || intptr_t(srcU)&15 || intptr_t(srcV)&15 || intptr_t(dst)&15 || src_pitch&15 || src_pitch_uv&15 || dst_pitch&15))
    {
     TconvertYV12<typename _mm::T64>::yv12_to_yuy2(srcY,srcU,srcV,src_rowsize,src_pitch,src_pitch_uv,dst,dst_pitch,height);
     return;
    }
   stride_t src_pitch_uv2 = src_pitch_uv*2;
   int skipnext = 0;

   stride_t dst_pitch2=dst_pitch*2;
   stride_t src_pitch2 = src_pitch*2;

   /**** Do first and last lines - NO interpolation:   *****/
   // MMX loop relies on C-code to adjust the lines for it.

   const BYTE* _srcY=srcY;
   const BYTE* _srcU=srcU;
   const BYTE* _srcV=srcV;
   BYTE* _dst=dst;
   for (int i=0;i<4;i++)
    {
     switch (i)
      {
       case 1:
        _srcY+=src_pitch;  // Same chroma as in 0
        _dst+=dst_pitch;
        break;
       case 2:
        _srcY=srcY+(src_pitch*(height-2));
        _srcU=srcU+(src_pitch_uv*((height>>1)-1));
        _srcV=srcV+(src_pitch_uv*((height>>1)-1));
        _dst = dst+(dst_pitch*(height-2));
        break;
       case 3: // Same chroma as in 4
        _srcY += src_pitch;
        _dst += dst_pitch;
        break;
       default:  // Nothing, case 0
        break;
      }
     typename _mm::__m mm7=_mm::setzero_si64();
     unsigned char *edi=_dst;
     const unsigned char *eax=_srcY;
     const unsigned char *ebx=_srcU;
     const unsigned char *ecx=_srcV;
     for (int edx=0;edx<src_rowsize;edx+=_mm::size,eax+=_mm::size,ebx+=_mm::size/2,ecx+=_mm::size/2,edi+=_mm::size*2)
      {
       typename _mm::__m mm0,mm1,mm3,mm2,mm4,mm5;
       movq (mm0,eax);    //Y
        movd (mm1,ebx);  //U
       movq (mm3,mm0);
        movd (mm2,ecx);  //V
       punpcklbw (mm0,mm7);  // Y low
        punpckhbw (mm3,mm7);   // Y high
       punpcklbw (mm1,mm7);   // 00uu 00uu
        punpcklbw (mm2,mm7);   // 00vv 00vv
       movq (mm4,mm1);
        movq (mm5,mm2);
       punpcklbw (mm1,mm7);   // 0000 00uu low
        punpcklbw (mm2,mm7);   // 0000 00vv low
       punpckhbw (mm4,mm7);   // 0000 00uu high
        punpckhbw (mm5,mm7);   // 0000 00vv high
       pslld (mm1,8);
        pslld (mm4,8);
       pslld (mm2,24);
        pslld (mm5,24);
       por (mm0, mm1);
        por (mm3, mm4);
       por (mm0, mm2);
        por (mm3, mm5);
       movq (edi,mm0);
        movq (edi+_mm::size,mm3);
      }
    }

  /****************************************
   * Conversion main loop.
   * The code properly interpolates UV from
   * interlaced material.
   * We process two lines in the same field
   * in the same loop, to avoid reloading
   * chroma each time.
   *****************************************/

   height-=4;

   dst+=dst_pitch2;
   srcY+=src_pitch2;
   srcU+=src_pitch_uv;
   srcV+=src_pitch_uv;

   const BYTE *srcp[3];
   srcp[0] = srcY;
   srcp[1] = srcU-src_pitch_uv;
convert_yv12.h - 源码说明

本页面展示了「从FFMPEG转换而来的H264解码程序,VC下编译..」中的 convert_yv12.h 源码文件，采用 C头文件编程语言编写，共 642 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与FFMPEG相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?