⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convert_yv12.h

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 H
📖 第 1 页 / 共 2 页
字号:
   srcp[2] = srcV-src_pitch_uv;

   typename _mm::__m add_ones=_mm::set1_pi8(1);
   for (int y=0;y<height;y+=2,dst+=dst_pitch2,srcp[0]+=src_pitch2,srcp[1]+=src_pitch_uv,srcp[2]+=src_pitch_uv)
    {
     unsigned char *edi=dst;
     const unsigned char *eax=srcp[0];
     const unsigned char *ebx=srcp[1];
     const unsigned char *ecx=srcp[2];
     for (int x=0;x<src_rowsize;x+=_mm::size,edi+=_mm::size*2,eax+=_mm::size,ebx+=_mm::size/2,ecx+=_mm::size/2)
      {
       typename _mm::__m mm6=add_ones;
       //int edx= src_pitch_uv;
       typename _mm::__m mm0,mm7=_mm::setzero_si64(),mm2,mm3,mm4,mm1,mm5;
       movq (mm0,eax);          // mm0 = Y current line
       movd (mm2,ebx+src_pitch_uv);            // mm2 = U top field
        movd (mm3, ecx+src_pitch_uv);          // mm3 = V top field
       movd (mm4,ebx);        // U prev top field
        movq (mm1,mm0);             // mm1 = Y current line
       movd (mm5,ecx);        // V prev top field
        _mm::pavgb (mm4,mm2);            // interpolate chroma U  (25/75)
       _mm::pavgb (mm5,mm3 );            // interpolate chroma V  (25/75)
        psubusb (mm4, mm6);         // Better rounding (thanks trbarry!)
       psubusb (mm5, mm6 );
        _mm::pavgb (mm4,mm2   );         // interpolate chroma U
       _mm::pavgb (mm5,mm3    );         // interpolate chroma V
        punpcklbw (mm0,mm7);        // Y low
       punpckhbw (mm1,mm7 );        // Y high*
        punpcklbw (mm4,mm7);        // U 00uu 00uu 00uu 00uu
       punpcklbw (mm5,mm7 );        // V 00vv 00vv 00vv 00vv
        pxor (mm6,mm6     );
       punpcklbw (mm6,mm4 );        // U 0000 uu00 0000 uu00 (low)
        punpckhbw (mm7,mm4);         // V 0000 uu00 0000 uu00 (high
       por (mm0,mm6 );
        por (mm1,mm7);
       movq (mm6,mm5);
        punpcklbw (mm5,mm5);          // V 0000 vvvv 0000 vvvv (low)
       punpckhbw (mm6,mm6);           // V 0000 vvvv 0000 vvvv (high)
        pslld (mm5,24);
       pslld (mm6,24);
        por (mm0,mm5);
       por (mm1,mm6);
       //mov edx, src_pitch_uv2
        movq (edi,mm0);
       movq (edi+_mm::size,mm1);

       //Next line

        movq (mm6,add_ones);
       movd (mm4,ebx+src_pitch_uv2);        // U next top field
        movd (mm5,ecx+src_pitch_uv2);       // V prev top field
       //mov edx, [src_pitch]
        pxor (mm7,mm7);
       movq (mm0,eax+src_pitch);        // Next U-line
        _mm::pavgb (mm4,mm2);            // interpolate chroma U
       movq (mm1,mm0);             // mm1 = Y current line
       _mm::pavgb (mm5,mm3);             // interpolate chroma V
        psubusb (mm4, mm6);         // Better rounding (thanks trbarry!)
       psubusb (mm5, mm6 );
        _mm::pavgb (mm4,mm2   );         // interpolate chroma U
       _mm::pavgb (mm5,mm3    );         // interpolate chroma V
        punpcklbw (mm0,mm7);        // Y low
       punpckhbw (mm1,mm7 );        // Y high*
        punpcklbw (mm4,mm7);        // U 00uu 00uu 00uu 00uu
       punpcklbw (mm5,mm7 );        // V 00vv 00vv 00vv 00vv
        pxor (mm6,mm6     );
       punpcklbw (mm6,mm4 );        // U 0000 uu00 0000 uu00 (low)
        punpckhbw (mm7,mm4);         // V 0000 uu00 0000 uu00 (high
       por (mm0,mm6 );
        por (mm1,mm7);
       movq (mm6,mm5);
        punpcklbw (mm5,mm5);          // V 0000 vvvv 0000 vvvv (low)
       punpckhbw (mm6,mm6);           // V 0000 vvvv 0000 vvvv (high)
        pslld (mm5,24);
       //mov edx,[dst_pitch]
       pslld (mm6,24);
        por (mm0,mm5);
       por (mm1,mm6);
        movq (edi+dst_pitch,mm0);
       movq (edi+dst_pitch+_mm::size,mm1);
      }
    }
   _mm::sfence();
   _mm::empty();
  }
 static void yv12_i_to_yuy2(const BYTE* srcY, const BYTE* srcU, const BYTE* srcV, int src_rowsize, stride_t src_pitch, stride_t src_pitch_uv,
                            BYTE* dst, stride_t dst_pitch,
                            int height)
  {
   if (_mm::align && (intptr_t(srcY)&15 || intptr_t(srcU)&15 || intptr_t(srcV)&15 || intptr_t(dst)&15 || src_pitch&15 || src_pitch_uv&15 || dst_pitch&15))
    {
     TconvertYV12<typename _mm::T64>::yv12_i_to_yuy2(srcY,srcU,srcV,src_rowsize,src_pitch,src_pitch_uv,dst,dst_pitch,height);
     return;
    }
   stride_t src_pitch_uv2 = src_pitch_uv*2;
   stride_t src_pitch_uv4 = src_pitch_uv*4;
   int skipnext = 0;

   stride_t dst_pitch2=dst_pitch*2;
   stride_t src_pitch2 = src_pitch*2;

   stride_t dst_pitch4 = dst_pitch*4;
   stride_t src_pitch4 = src_pitch*4;


   /**** Do first and last lines - NO interpolation:   *****/
   // MMX loop relies on C-code to adjust the lines for it.
   const BYTE* _srcY=srcY;
   const BYTE* _srcU=srcU;
   const BYTE* _srcV=srcV;
   BYTE* _dst=dst;
  //
   for (int i=0;i<8;i++)
    {
     switch (i)
      {
       case 1:
        _srcY+=src_pitch2;  // Same chroma as in 0
        _dst+=dst_pitch2;
        break;
       case 2:
        _srcY-=src_pitch;  // Next field
        _dst-=dst_pitch;
        _srcU+=src_pitch_uv;
        _srcV+=src_pitch_uv;
        break;
       case 3:
        _srcY+=src_pitch2;  // Same  chroma as in 2
        _dst+=dst_pitch2;
        break;
       case 4: // Now we process the bottom four lines of the picture.
        _srcY=srcY+(src_pitch*(height-4));
        _srcU=srcU+(src_pitch_uv*((height>>1)-2));
        _srcV=srcV+(src_pitch_uv*((height>>1)-2));
        _dst = dst+(dst_pitch*(height-4));
        break;
       case 5: // Same chroma as in 4
        _srcY += src_pitch2;
        _dst += dst_pitch2;
        break;
       case 6:  // Next field
        _srcY -= src_pitch;
        _dst -= dst_pitch;
        _srcU+=src_pitch_uv;
        _srcV+=src_pitch_uv;
        break;
       case 7:  // Same chroma as in 6
        _srcY += src_pitch2;
        _dst += dst_pitch2;
       default:  // Nothing, case 0
        break;
      }

     unsigned char *edi=_dst;
     const unsigned char *eax=_srcY;
     const unsigned char *ebx=_srcU;
     const unsigned char *ecx=_srcV;
     typename _mm::__m mm7=_mm::setzero_si64();
     for (int edx=0;edx<src_rowsize;edx+=_mm::size,eax+=_mm::size,ebx+=_mm::size/2,ecx+=_mm::size/2,edi+=_mm::size*2)
      {
       typename _mm::__m mm0,mm1,mm3,mm2,mm4,mm5;
       movq (mm0,eax);    //Y
        movd (mm1,ebx);  //U
       movq (mm3,mm0);
        movd (mm2,ecx);   //V
       punpcklbw (mm0,mm7);  // Y low
        punpckhbw (mm3,mm7);   // Y high
       punpcklbw (mm1,mm7 );  // 00uu 00uu
        punpcklbw (mm2,mm7);   // 00vv 00vv
       movq (mm4,mm1 );
        movq (mm5,mm2);
       punpcklbw (mm1,mm7 );  // 0000 00uu low
        punpcklbw (mm2,mm7);   // 0000 00vv low
       punpckhbw (mm4,mm7 );  // 0000 00uu high
        punpckhbw (mm5,mm7);   // 0000 00vv high
       pslld (mm1,8);
        pslld (mm4,8);
       pslld (mm2,24);
        pslld (mm5,24);
       por (mm0, mm1);
        por (mm3, mm4);
       por (mm0, mm2);
        por (mm3, mm5);
       movq (edi,mm0);
        movq (edi+_mm::size,mm3);
      }
    }

  /****************************************
   * Conversion main loop.
   * The code properly interpolates UV from
   * interlaced material.
   * We process two lines in the same field
   * in the same loop, to avoid reloading
   * chroma each time.
   *****************************************/

   height-=8;

   dst+=dst_pitch4;
   srcY+=src_pitch4;
   srcU+=src_pitch_uv2;
   srcV+=src_pitch_uv2;

   const BYTE *srcp[3];
   srcp[0] = srcY;
   srcp[1] = srcU-src_pitch_uv2;
   srcp[2] = srcV-src_pitch_uv2;

   typename _mm::__m add_ones=_mm::set1_pi8(1);
   for (int y=0;y<height;)
    {
     unsigned char *edi=dst;
     const unsigned char *eax=srcp[0];
     const unsigned char *ebx=srcp[1];
     const unsigned char *ecx=srcp[2];
    yloop:
     for (int x=0;x<src_rowsize;x+=_mm::size,edi+=_mm::size*2,eax+=_mm::size,ebx+=_mm::size/2,ecx+=_mm::size/2)
      {
       //mov edx, src_pitch_uv2
       typename _mm::__m mm6=add_ones,mm0,mm7,mm2,mm3,mm4,mm1,mm5;
       movq (mm0,eax);          // mm0 = Y current line
        pxor (mm7,mm7);
       movd (mm2,ebx+src_pitch_uv2);            // mm2 = U top field
        movd (mm3, ecx+src_pitch_uv2);          // mm3 = V top field
       movd (mm4,ebx);            // U prev top field
        movq (mm1,mm0);             // mm1 = Y current line
       movd (mm5,ecx);            // V prev top field
        _mm::pavgb (mm4,mm2);            // interpolate chroma U
       _mm::pavgb (mm5,mm3);             // interpolate chroma V
        psubusb (mm4, mm6);         // Better rounding (thanks trbarry!)
       psubusb (mm5, mm6);
        _mm::pavgb (mm4,mm2);            // interpolate chroma U
       _mm::pavgb (mm5,mm3);             // interpolate chroma V
       punpcklbw (mm0,mm7);        // Y low
       punpckhbw (mm1,mm7);         // Y high*
        punpcklbw (mm4,mm7);        // U 00uu 00uu 00uu 00uu
       punpcklbw (mm5,mm7);         // V 00vv 00vv 00vv 00vv
        pxor (mm6,mm6);
       punpcklbw (mm6,mm4);         // U 0000 uu00 0000 uu00 (low)
        punpckhbw (mm7,mm4);         // V 0000 uu00 0000 uu00 (high
       por (mm0,mm6);
        por (mm1,mm7);
       movq (mm6,mm5);
        punpcklbw (mm5,mm5);          // V 0000 vvvv 0000 vvvv (low)
       punpckhbw (mm6,mm6);           // V 0000 vvvv 0000 vvvv (high)
        pslld (mm5,24);
       pslld (mm6,24);
        por (mm0,mm5);
       por (mm1,mm6);
       //mov edx, src_pitch_uv4
        movq (edi,mm0);
       movq (edi+_mm::size,mm1);

       //Next line in same field
        movq (mm6, add_ones);
       movd (mm4,ebx+src_pitch_uv4);        // U next top field
        movd (mm5,ecx+src_pitch_uv4);       // V prev top field
       //mov edx, [src_pitch2]
        movq( mm0,eax+src_pitch2);        // Next Y-line
       _mm::pavgb (mm4,mm2);            // interpolate chroma U
        _mm::pavgb (mm5,mm3);             // interpolate chroma V
       psubusb (mm4, mm6);         // Better rounding (thanks trbarry!)
        psubusb (mm5, mm6);
       _mm::pavgb (mm4,mm2);            // interpolate chroma U
        _mm::pavgb (mm5,mm3);             // interpolate chroma V
       pxor (mm7,mm7);
       movq (mm1,mm0);             // mm1 = Y current line
        punpcklbw (mm0,mm7);        // Y low
       punpckhbw (mm1,mm7 );        // Y high*
        punpcklbw (mm4,mm7);        // U 00uu 00uu 00uu 00uu
       punpcklbw (mm5,mm7 );        // V 00vv 00vv 00vv 00vv
        pxor (mm6,mm6);
       punpcklbw (mm6,mm4);         // U 0000 uu00 0000 uu00 (low)
        punpckhbw (mm7,mm4);         // V 0000 uu00 0000 uu00 (high
       por (mm0,mm6 );
        por( mm1,mm7);
       movq (mm6,mm5);
        punpcklbw (mm5,mm5);          // V 0000 vvvv 0000 vvvv (low)
       punpckhbw (mm6,mm6);           // V 0000 vvvv 0000 vvvv (high)
        pslld (mm5,24);
       //mov edx,[dst_pitch2]
       pslld (mm6,24);
        por (mm0,mm5);
       por (mm1,mm6);
        movq (edi+dst_pitch2,mm0);
       movq (edi+dst_pitch2+_mm::size,mm1);
      }
     if (skipnext)
      {
       dst+=dst_pitch4;
       srcp[0]+=src_pitch4;
       srcp[1]+=src_pitch_uv2;
       srcp[2]+=src_pitch_uv2;
       skipnext=0;
       y+=4;
      }
     else
      {
       edi=dst;
       eax=srcp[0];
       ebx=srcp[1];
       ecx=srcp[2];
       edi+=dst_pitch;
       eax+=src_pitch;
       ebx+=src_pitch_uv;
       ecx+=src_pitch_uv;
       skipnext=1;
       if(y+4<=height)
        goto yloop;
      }
    }
   _mm::sfence();
   _mm::empty();
  }
};

#pragma warning(pop)

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -