📄 tomsmocomp.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 4 页
字号:
           // simple bob last 8 bytes
           edx= Last8;
           esi=ebx+edx;
           movq    (mm0, esi);
           //pavgb   mm0, qword ptr[esi+ecx]
           Tsimd::v_pavgb(mm0, esi+ecx, mm2, ShiftMask);
           Tsimd::movntq(edi+edx, mm0);
          }
         // now loop and get the middle qwords
         esi= pSrc;
         edi= (unsigned char*)pSrcP;
         edx= 8;                          // curr offset into all lines
        LoopQ:
         eax= (unsigned char*)pBobP;
         edi+= 8;
         esi+= 8;
         ebx+= 8;
         eax+= edx;

         //weirdbob.inc
         // First, get and save our possible Bob values
         // Assume our pixels are layed out as follows with x the calc'd bob value
         // and the other pixels are from the current field
         //
         //                j a b c k             current field
         //            x                 calculated line
         //        m d e f n             current field
         //
         // we calc the bob value as:
         //              x2 = either avg(a,f), avg(c,d), avg(b,e), avg(j,n), or avg(k,m)

         // selected for the     smallest of abs(a,f), abs(c,d), or abs(b,e), etc.

         // a,f
         // 2003/06/17 Use BPP (bits per pixel) here
         movq    (mm0, ebx-BPP);         // value a from top left
         movq    (mm1, ebx+ecx+BPP);     // value f from bottom right
         movq    (mm6, mm0);
         //pavgb   mm6, mm1                                        // avg(a,f), also best so far
         Tsimd::v_pavgb(mm6, mm1, mm7, ShiftMask);      // avg(a,f), also best so far
         movq    (mm7, mm0);
         psubusb (mm7, mm1);
         psubusb (mm1, mm0);
         por     (mm7, mm1);                                        // abs diff, also best so far

         // c,d
         movq    (mm0, ebx+BPP);         // value a from top left
         movq    (mm1, ebx+ecx-BPP);     // value f from bottom right
         movq    (mm2, mm0);
         //pavgb   mm2, mm1                                        // avg(c,d)
         Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask);      // avg(c,d)
         movq    (mm3, mm0);
         psubusb (mm3, mm1);
         psubusb (mm1, mm0);
         por     (mm3, mm1);                                        // abs(c,d)
         movq    (mm1, mm3);                                        // keep copy

         psubusb (mm3, mm7);                        // nonzero where new weights bigger, else 0
         pxor    (mm4, mm4);
         pcmpeqb (mm3, mm4);                        // now ff where new better, else 00
         pcmpeqb (mm4, mm3);                        // here ff where old better, else 00

         pand    (mm1, mm3);                        // keep only better new avg and abs
         pand    (mm2, mm3);

         pand    (mm6, mm4);
         pand    (mm7, mm4);

         por     (mm6, mm2);                        // and merge new & old vals keeping best
         por     (mm7, mm1);

         if (!USE_YV12)
          {
           por     (mm7, UVMask);                     // but we know YUY2 chroma is worthless so far
           pand    (mm5, YMask);                      // mask out chroma from here also
          }

         // j,n
         movq    (mm0, ebx-2*BPP);               // value j from top left
         movq    (mm1, ebx+ecx+2*BPP);   // value n from bottom right
         movq    (mm2, mm0);
         // pavgb   mm2, mm1                                        // avg(j,n)
         Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask);      // avg(j,n)
         movq    (mm3, mm0);
         psubusb (mm3, mm1);
         psubusb (mm1, mm0);
         por     (mm3, mm1);                                        // abs(j-n)
         movq    (mm1, mm3);                                        // keep copy

         psubusb (mm3, mm7);                        // nonzero where new weights bigger, else 0
         pxor    (mm4, mm4);
         pcmpeqb (mm3, mm4);                        // now ff where new better, else 00
         pcmpeqb (mm4, mm3);                        // here ff where old better, else 00

         pand    (mm1, mm3);                        // keep only better new avg and abs
         pand    (mm3, mm2);

         pand    (mm6, mm4);
         pand    (mm7, mm4);

         por     (mm6, mm3);                        // and merge new & old vals keeping best
         por     (mm7, mm1);                        // "

         // k, m
         movq    (mm0, ebx+2*BPP);               // value k from top right
         movq    (mm1, ebx+ecx-2*BPP);   // value n from bottom left
         movq    (mm4, mm0);
         //pavgb   mm4, mm1                                        // avg(k,m)
         Tsimd::v_pavgb(mm4, mm1, mm3, ShiftMask);      // avg(k,m)
         movq    (mm3, mm0);
         psubusb (mm3, mm1);
         psubusb (mm1, mm0);
         por     (mm3, mm1);                                        // abs(k,m)
         movq    (mm1, mm3);                                        // keep copy

         movq    (mm2, mm4);                        // avg(k,m)

         psubusb (mm3, mm7);                        // nonzero where new weights bigger, else 0
         pxor    (mm4, mm4);
         pcmpeqb (mm3, mm4);                        // now ff where new better, else 00
         pcmpeqb (mm4, mm3);                        // here ff where old better, else 00

         pand    (mm1, mm3);                        // keep only better new avg and abs
         pand    (mm3, mm2);

         pand    (mm6, mm4);
         pand    (mm7, mm4);

         por     (mm6, mm3);                        // and merge new & old vals keeping best
         por     (mm7, mm1);                        // "

         // b,e
         movq    (mm0,ebx);             // value b from top
         movq    (mm1,ebx+ecx); // value e from bottom

         // We will also calc here the max/min values to later limit comb
         // so the max excursion will not exceed the Max_Comb constant
         if (SKIP_SEARCH)
          {
           movq    (mm2, mm0);
           //pminub  mm2, mm1
           Tsimd::v_pminub(mm2, mm1, mm4);
           //pmaxub  mm6, mm2                        // clip our current results so far to be above this
           Tsimd::pmaxub(mm6, mm2);
           movq    (mm2, mm0);
           Tsimd::pmaxub (mm2, mm1);
           // pminub  mm6, mm2                        // clip our current results so far to be below this
           Tsimd::v_pminub(mm6, mm2, mm4);
          }
         else
          {
           movq    (mm2, mm0);
           movq    (mm4, eax);
           psubusb (mm2, mm4);
           psubusb (mm4, mm0);
           por     (mm4, mm2);                        // abs diff

           movq    (mm2, mm1);
           movq    (mm3, eax+ecx);
           psubusb (mm2, mm3);
           psubusb (mm3, mm1);
           por     (mm3, mm2);                        // abs diff
           //pmaxub  mm3, mm4                        // top or bottom pixel moved most
           Tsimd::pmaxub(mm3, mm4);                     // top or bottom pixel moved most
           psubusb (mm3, Max_Mov);            // moved more than allowed? or goes to 0?
           pxor    (mm4, mm4);
           pcmpeqb (mm3, mm4);                        // now ff where low motion, else high motion
           movq    (mm2, mm0);
           // pminub  mm2, mm1
           Tsimd::v_pminub(mm2, mm1, mm4);
           // pmaxub  mm6, mm2                        // clip our current results so far to be above this
           Tsimd::pmaxub(mm6, mm2);

           psubusb (mm2, mm3);                        // maybe decrease it to 0000.. if no surround motion
           movq    (Min_Vals, mm2);

           movq    (mm2, mm0);
           Tsimd::pmaxub (mm2, mm1);
           //  pminub  mm6, mm2                        // clip our current results so far to be below this
           Tsimd::v_pminub (mm6, mm2, mm4);
           paddusb (mm2, mm3);                        // maybe increase it to ffffff if no surround motion
           movq    (Max_Vals, mm2);
          }

         movq    (mm2, mm0);
         //pavgb   mm2, mm1                                        // avg(b,e)
         Tsimd::v_pavgb(mm2, mm1, mm3, ShiftMask);      // avg(b,e)

         movq    (mm3, mm0);
         psubusb (mm3, mm1);
         psubusb (mm1, mm0);
         por     (mm3, mm1);                        // abs(c,d)
         movq    (mm1, mm3);                        // keep copy of diffs

         pxor    (mm4, mm4);
         psubusb (mm3, mm7);                        // nonzero where new weights bigger, else 0
         pcmpeqb (mm3, mm4);                        // now ff where new better, else 00
         pcmpeqb (mm4, mm3);                        // here ff where old better, else 00

         pand    (mm1, mm3);
         pand    (mm2, mm3);

         pand    (mm6, mm4);
         pand    (mm7, mm4);

         por     (mm6, mm2);                        // our x2 value
         por     (mm7, mm1);                        // our x2 diffs
         movq    (mm4, mm7);                        // save as bob uncertainty indicator

         // For non-SSE2:
         // through out most of the rest of this loop we will maintain
         //      mm4             our min bob value
         //      mm5             best weave pixels so far
         //  mm6         our max Bob value
         //      mm7             best weighted pixel ratings so far

         // We will keep a slight bias to using the weave pixels
         // from the current location, by rating them by the min distance
         // from the Bob value instead of the avg distance from that value.
         // our best and only rating so far
         pcmpeqb(mm7, mm7);                        // ffff, say we didn't find anything good yet
        }
       // end of top
       search<SearchRange>(ecx,esi,edi,mm0,mm1,mm2,mm3,mm5,mm7);
       // bottom
       if (IS_SSE2)
        ;//sse2 code deleted for now
       else
        {
         // Version for non-SSE2
         if (SKIP_SEARCH)
          movq    (mm0, mm6);                        // just use the results of our wierd bob
         else
          {
           // Use the better of bob or weave
           //pminub  mm4, TENS                       // the most we care about
           Tsimd::v_pminub (mm4, TENS, mm0);       // the most we care about

           psubusb (mm7, mm4  );                      // foregive that much from weave est?
           psubusb (mm7, FOURS);                      // bias it a bit toward weave
           pxor    (mm0, mm0);
           pcmpeqb (mm7, mm0);                        // all ff where weave better, else 00
           pcmpeqb (mm0, mm7);                        // all ff where bob better, else 00
           pand    (mm0, mm6);                        // use bob for these pixel values
           pand    (mm7, mm5);                        // use weave for these
           por     (mm0, mm7);                        // combine both
           //pminub  mm0, Max_Vals           // but clip to catch the stray error
           Tsimd::v_pminub(mm0, Max_Vals, mm1); // but clip to catch the stray error
           //pmaxub  mm0, Min_Vals
           Tsimd::pmaxub (mm0, Min_Vals);
          }
         eax= pDest;
         if (USE_VERTICAL_FILTER)
          {
           movq    (mm1, mm0);
           //pavgb   mm0, qword ptr[ebx]
           Tsimd::v_pavgb (mm0, ebx, mm2, ShiftMask);
           //movntq  qword ptr[eax+edx], mm0
           Tsimd::movntq(eax+edx, mm0);
           //pavgb   mm1, qword ptr[ebx+ecx]
           Tsimd::v_pavgb (mm1, ebx+ecx, mm2, ShiftMask);
           eax+= dst_pitchw;
           //movntq  qword ptr[eax+edx], mm1
           Tsimd::movntq(eax+edx, mm1);
          }
         else
          {
           // movntq  qword ptr[eax+edx], mm0
           movq    (mm1, esi+ecx);
           Tsimd::movntq(eax+edx, mm0);
          }
         edx=edx+8;            // bump offset pointer
         if (edx<Last8)                     // done with line?
          goto LoopQ;                           // y
        }
       // adjust for next line
       pSrc  += src_pitch2;
       pSrcP += src_pitch2;
       pDest += dst_pitch2;
       pBob += src_pitch2;
       pBobP += src_pitch2;
      }
     _mm_empty();
    }
  };
public:
 static TsearchLoopFc getProcess(int SearchEffort)
  {
   // Go fill in the hard part, being variously lazy depending upon SearchEffort
   if (SearchEffort == -1)
    return TsearchLoop< true, true, 0>::SearchLoop;
   else if (SearchEffort == 0)
    return TsearchLoop<false, true, 0>::SearchLoop;
   else if (SearchEffort <= 1)
    return TsearchLoop<false,false, 1>::SearchLoop;
   // else if (SearchEffort <= 2)
    //Search_Effort_2();
   else if (SearchEffort <= 3)
    return TsearchLoop<false,false, 3>::SearchLoop;
   else if (SearchEffort <= 5)
    return TsearchLoop<false,false, 5>::SearchLoop;
   else if (SearchEffort <= 9)
    return TsearchLoop<false,false, 9>::SearchLoop;
   else if (SearchEffort <= 11)
    return TsearchLoop<false,false,11>::SearchLoop;
   else if (SearchEffort <= 13)
    return TsearchLoop<false,false,13>::SearchLoop;
   else if (SearchEffort <= 15)
    return TsearchLoop<false,false,15>::SearchLoop;
   else if (SearchEffort <= 19)
    return TsearchLoop<false,false,19>::SearchLoop;
   else if (SearchEffort <= 21)
    return TsearchLoop<false,false,21>::SearchLoop;
   else
    return TsearchLoop<false,false,99>::SearchLoop;
  }
};

template<class Tsimd> struct TdblResize
{
 static __forceinline void DblResizeH_1(int rowsize, const BYTE* srcp, BYTE* dstp)
  {
   int ct = (rowsize-8) >> 3;
   // Loop general reg usage
   //
   // ecx loop ctr
   // edi - dest
   // esi - src pixels

   // now loop and get the middle qwords
   unsigned char *edi= dstp;
   const unsigned char *esi= srcp;
   __m64 mm0,mm2,mm6,mm1,mm7;
   for (int ecx=ct;ecx>0;ecx--,esi+=8,edi+=16)
    {
     // just use the current line, first luma
     movq (mm0, esi);  // orig luma, in register as VYUYVYUY
     movq (mm2, mm0);    // keep a copy while we got it
     movq (mm6, esi+2); // luma 2 bytes to right
💿 文件大小 8073 K
👤 上传用户 sinba
📂 所属分类压缩解压
🏷️ 相关标签

#FFMPEG #H264 #VC #转换
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -