📄 convert_yuy2.h

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 H
📖 第 1 页 / 共 2 页
字号:
上一页 12
 static __forceinline bool YUV2RGB_INNER_LOOP(int no_next_pixel,const unsigned char* &esi,const unsigned char* const ecx,unsigned char* &edi,const unsigned char* const edx,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm4,__m64 &mm5,__m64 &mm6,__m64 &mm7)
  {
   //This YUV422->RGB conversion code uses only four MMX registers per
   //source dword, so I convert two dwords in parallel.  Lines corresponding
   //to the "second pipe" are indented an extra space.  There's almost no
   //overlap, except at the end and in the three lines marked ***.
   //revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations
   static const int ofs_x0000_0000_0010_0010=0;
   static const int ofs_x0080_0080_0080_0080=8;
   static const int ofs_x00002000_00002000=24;
   static const int ofs_xFF000000_FF000000=32;
   static const int ofs_cy=40;
   static const int ofs_crv=48;
   static const int ofs_cgu_cgv=56;
   static const int ofs_cbu=64;
   bool ret;

   movd           (mm0,esi);
    movd          ( mm5,esi+4);
   movq           (mm1,mm0);
   GET_Y          (mm0,uyvy,edx);       // mm0 = __________Y1__Y0
    movq          ( mm4,mm5);
   GET_UV         (mm1,uyvy,edx);       // mm1 = __________V0__U0
    GET_Y         ( mm4,uyvy,edx);      // mm4 = __________Y3__Y2
   movq           (mm2,mm5);         // *** avoid reload from [esi+4]
    GET_UV        ( mm5,uyvy,edx);      // mm5 = __________V2__U2
   psubw          (mm0,edx+ofs_x0000_0000_0010_0010);      // (Y-16)
    movd          ( mm6,esi+8-4*(no_next_pixel));
   GET_UV         (mm2,uyvy,edx);       // mm2 = __________V2__U2
    psubw         ( mm4,edx+ofs_x0000_0000_0010_0010);     // (Y-16)
   paddw          (mm2,mm1);         // 2*UV1=UV0+UV2
    GET_UV        ( mm6,uyvy,edx);      // mm6 = __________V4__U4
   psubw          (mm1,edx+ofs_x0080_0080_0080_0080);      // (UV-128)
    paddw         ( mm6,mm5);        // 2*UV3=UV2+UV4
   psllq          (mm2,32);
    psubw         ( mm5,edx+ofs_x0080_0080_0080_0080);     // (UV-128)
   punpcklwd      (mm0,mm2);         // mm0 = ______Y1______Y0
    psllq         ( mm6,32);
   pmaddwd        (mm0,edx+ofs_cy);        // (Y-16)*(255./219.)<<14
    punpcklwd     ( mm4,mm6);
   paddw          (mm1,mm1);         // 2*UV0=UV0+UV0
    pmaddwd       ( mm4,edx+ofs_cy);
    paddw         ( mm5,mm5);        // 2*UV2=UV2+UV2
   paddw          (mm1,mm2);         // mm1 = __V1__U1__V0__U0 * 2
   paddd          (mm0,edx+ofs_x00002000_00002000);        // +=0.5<<14
    paddw         ( mm5,mm6);        // mm5 = __V3__U3__V2__U2 * 2
   movq           (mm2,mm1);
    paddd         ( mm4,edx+ofs_x00002000_00002000);       // +=0.5<<14
   movq           (mm3,mm1);
    movq          ( mm6,mm5);
   pmaddwd        (mm1,edx+ofs_crv);
    movq          ( mm7,mm5);
   paddd          (mm1,mm0);
    pmaddwd       ( mm5,edx+ofs_crv);
   psrad          (mm1,14);          // mm1 = RRRRRRRRrrrrrrrr
    paddd         ( mm5,mm4);
   pmaddwd        (mm2,edx+ofs_cgu_cgv);
    psrad         ( mm5,14);
   paddd          (mm2,mm0);
    pmaddwd       ( mm6,edx+ofs_cgu_cgv);
   psrad          (mm2,14);          // mm2 = GGGGGGGGgggggggg
    paddd         ( mm6,mm4);
   pmaddwd        (mm3,edx+ofs_cbu);
    psrad         ( mm6,14);
   paddd          (mm3,mm0);
    pmaddwd       ( mm7,edx+ofs_cbu);
   esi+=8;
   edi+=12+4*rgb32;
   if (!no_next_pixel)
    ret=esi<ecx;//cmp             esi,ecx
   else
    ret=true;
   psrad           (mm3,14  );        // mm3 = BBBBBBBBbbbbbbbb
    paddd          ( mm7,mm4);
   pxor            (mm0,mm0 );
    psrad          ( mm7,14 );
   packssdw        (mm3,mm2 );// mm3 = GGGGggggBBBBbbbb
    packssdw       ( mm7,mm6);
   packssdw        (mm1,mm0 );// mm1 = ________RRRRrrrr
    packssdw       ( mm5,mm0);        // *** avoid pxor mm4,mm4
   movq            (mm2,mm3 );
    movq           ( mm6,mm7);
   punpcklwd       (mm2,mm1 );// mm2 = RRRRBBBBrrrrbbbb
    punpcklwd      ( mm6,mm5);
   punpckhwd       (mm3,mm1 );// mm3 = ____GGGG____gggg
    punpckhwd      ( mm7,mm5);
   movq            (mm0,mm2 );
    movq           ( mm4,mm6);
   punpcklwd       (mm0,mm3 );// mm0 = ____rrrrggggbbbb
    punpcklwd      ( mm4,mm7);
   if (!rgb32)
    {
     psllq         (  mm0,16);
      psllq        (   mm4,16);
    }
   punpckhwd       (mm2,mm3 );// mm2 = ____RRRRGGGGBBBB
    punpckhwd      ( mm6,mm7);
   packuswb        (mm0,mm2 );// mm0 = __RRGGBB__rrggbb <- ta dah!
    packuswb       ( mm4,mm6);

   if (rgb32)
    {
     por (mm0, edx+ofs_xFF000000_FF000000);    // set alpha channels "on"
      por (mm4, edx+ofs_xFF000000_FF000000);
     movq    (edi-16,mm0);    // store the quadwords independently
      movq   ( edi-8,mm4);
    }
   else
    {
     psrlq   (mm0,8     );      // pack the two quadwords into 12 bytes
     psllq   (mm4,8     );      // (note: the two shifts above leave
     movd    (edi-12,mm0);    // mm0,4 = __RRGGBBrrggbb__)
     psrlq   (mm0,32    );
     por     (mm4,mm0   );
     movd    (edi-8,mm4 );
     psrlq   (mm4,32    );
     movd    (edi-4,mm4 );
    }
   return ret;
  }

public:
 static void mmx_ConvertYUY2toRGB(const BYTE* src,BYTE* dst,const BYTE* src_end,stride_t src_pitch,stride_t dst_pitch,int row_size,int matrix)  //0=rec601, 1=rec709, 3=PC_601, 7=PC_709
  {
   static const int64_t yuv2rgb_constants[4][9]=
    {
     {0x00000000000100010LL, //rec601
      0x00080008000800080LL,
      0x000FF00FF00FF00FFLL,
      0x00000200000002000LL,
      0x0FF000000FF000000LL,
      0x000004A8500004A85LL,
      0x03313000033130000LL,
      0x0E5FCF377E5FCF377LL,
      0x00000408D0000408DLL},

     {0x00000000000100010LL, //rec709
      0x00080008000800080LL,
      0x000FF00FF00FF00FFLL,
      0x00000200000002000LL,
      0x0FF000000FF000000LL,
      0x000004A8500004A85LL,
      0x03960000039600000LL,
      0x0EEF5F930EEF5F930LL,
      0x00000439B0000439BLL},

     {0x00000000000000000LL, //PC601
      0x00080008000800080LL,
      0x000FF00FF00FF00FFLL,
      0x00000200000002000LL,
      0x0FF000000FF000000LL,
      0x00000400000004000LL,
      0x02D0B00002D0B0000LL,
      0x0E90FF4F2E90FF4F2LL,
      0x0000038ED000038EDLL},

     {0x00000000000000000LL, //PC709
      0x00080008000800080LL,
      0x000FF00FF00FF00FFLL,
      0x00000200000002000LL,
      0x0FF000000FF000000LL,
      0x00000400000004000LL,
      0x03299000032990000LL,
      0x0F0F8F9FEF0F8F9FELL,
      0x000003B9F00003B9FLL}
    };
   const unsigned char *edx=(const unsigned char*)yuv2rgb_constants[matrix];
   __m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
   for (;src!=src_end;src+=src_pitch,dst+=dst_pitch)
    {
     const unsigned char *srcLn=src,*srcLnEnd=srcLn+row_size-8;
     unsigned char *dstLn=dst;
     while (YUV2RGB_INNER_LOOP(0,srcLn,srcLnEnd,dstLn,edx,mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7))
      ;
     YUV2RGB_INNER_LOOP(1,srcLn,srcLnEnd,dstLn,edx,mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7);
    }
   _mm_empty();
  }
};

#pragma warning(pop)

#endif
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -