⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nic_postprocess.cpp

📁 从FFMPEG转换而来的H264解码程序,VC下编译..
💻 CPP
📖 第 1 页 / 共 5 页
字号:
         v[2*stride + 4] =
         v[3*stride + 4] = 255;
        }
       #endif
      }
    }
   else    /* use default mode */
    {
     deblock_horiz_default_filter(v, stride, QP);
     #ifdef SHOWDECISIONS_H
     if (!chromaFlag)
      {
       v[0*stride + 4] =
       v[1*stride + 4] =
       v[2*stride + 4] =
       v[3*stride + 4] = 0;
      }
     #endif
    }
  }
}

/* decide DC mode or default mode in assembler */
static inline  int deblock_vert_useDC(uint8_t *v, stride_t stride, int DEBLOCK_VERT_USEDC_THR)
{
        __m64 mask = _mm_set1_pi8(/*0xfe*/-2);
        int32_t mm_data1;
        uint64_t *pmm1;
        int eq_cnt, useDC;
        #ifdef PP_SELF_CHECK
        int useDC2, i, j;
        #endif

        #ifdef PP_SELF_CHECK
        /* C-code version for testing */
        eq_cnt = 0;
        for (j=1; j<8; j++)
        {
                for (i=0; i<8; i++)
                {
                        if (abs(v[j*stride+i] - v[(j+1)*stride+i]) <= 1) eq_cnt++;
                }
        }
        useDC2 = (eq_cnt > DEBLOCK_VERT_USEDC_THR);
        #endif

        /* starting pointer is at v[stride] == v1 in mpeg4 notation */
        pmm1 = (uint64_t *)(&(v[stride]));

        /* first load some constants into mm4, mm6, mm7 */
                //push eax
                unsigned char *eax=(unsigned char*)pmm1;
                __m64 mm6,mm7,mm2,mm4,mm3;
                movq (mm6, mask);               /*mm6 = 0xfefefefefefefefe       */
                pxor (mm7, mm7);                /*mm7 = 0x0000000000000000       */

                movq (mm2, eax);              /* mm2 = *p_data                 */
                pxor (mm4, mm4);                /*mm4 = 0x0000000000000000       */

                eax+=stride;                 /* p_data += stride              */
                movq   (mm3, mm2);              /* mm3 = *p_data                 */
                __m64 mm0,mm1;
                movq   (mm2, eax);           /* mm2 = *p_data                 */
                movq   (mm0, mm3);             /* mm0 = mm3                     */

                movq   (mm3, mm2);             /* mm3 = *p_data                 */
                movq   (mm1, mm0);             /* mm1 = mm0                     */

                psubusb (mm0, mm2);            /* mm0 -= mm2                    */
                eax+=stride;                   /* p_data += stride              */

                psubusb (mm2, mm1);            /* mm2 -= mm1                    */
                por    (mm0, mm2);             /* mm0 |= mm2                    */

                pand   (mm0, mm6);             /* mm0 &= 0xfefefefefefefefe     */
                pcmpeqb (mm0, mm4);            /* is mm0 == 0 ?                 */

                movq   (mm2, eax);             /* mm2 = *p_data                 */
                psubb  (mm7, mm0);             /* mm7 has running total of eqcnts */
                __m64 mm5;
                movq   (mm5, mm3);             /* mm5 = mm3                     */
                movq   (mm3, mm2);             /* mm3 = *p_data                 */

                movq   (mm1, mm5);             /* mm1 = mm5                     */
                psubusb (mm5, mm2);            /* mm5 -= mm2                    */

                psubusb (mm2, mm1);            /* mm2 -= mm1                    */
                por    (mm5, mm2);             /* mm5 |= mm2                    */

                eax+=stride;           /* p_data += stride              */
                pand   (mm5, mm6);             /* mm5 &= 0xfefefefefefefefe     */

                pcmpeqb (mm5, mm4);            /* is mm0 == 0 ?                 */
                psubb  (mm7, mm5);             /* mm7 has running total of eqcnts */

                movq   (mm2, eax);           /* mm2 = *p_data                 */
                movq   (mm0, mm3);             /* mm0 = mm3                     */

                movq   (mm3, mm2);             /* mm3 = *p_data                 */
                movq   (mm1, mm0);             /* mm1 = mm0                     */

                psubusb (mm0, mm2);            /* mm0 -= mm2                    */
                eax+=stride;                   /* p_data += stride              */

                psubusb (mm2, mm1);            /* mm2 -= mm1                    */
                por    (mm0, mm2);             /* mm0 |= mm2                    */

                pand   (mm0, mm6);             /* mm0 &= 0xfefefefefefefefe     */
                pcmpeqb (mm0, mm4);            /* is mm0 == 0 ?                 */

                movq   (mm2, eax);           /* mm2 = *p_data                 */
                psubb  (mm7, mm0);             /* mm7 has running total of eqcnts */

                movq   (mm5, mm3);             /* mm5 = mm3                     */
                movq   (mm3, mm2);             /* mm3 = *p_data                 */

                movq   (mm1, mm5);             /* mm1 = mm5                     */
                psubusb (mm5, mm2);            /* mm5 -= mm2                    */

                psubusb (mm2, mm1);            /* mm2 -= mm1                    */
                por    (mm5, mm2);             /* mm5 |= mm2                    */

                eax+=stride;           /* p_data += stride              */
                pand   (mm5, mm6);             /* mm5 &= 0xfefefefefefefefe     */

                pcmpeqb (mm5, mm4);            /* is mm0 == 0 ?                 */
                psubb  (mm7, mm5);             /* mm7 has running total of eqcnts */

                movq   (mm2, eax);           /* mm2 = *p_data                 */
                movq   (mm0, mm3);             /* mm0 = mm3                     */

                movq   (mm3, mm2);             /* mm3 = *p_data                 */
                movq   (mm1, mm0);             /* mm1 = mm0                     */

                psubusb (mm0, mm2);            /* mm0 -= mm2                    */
                eax+=stride;           /* p_data += stride              */

                psubusb (mm2, mm1);            /* mm2 -= mm1                    */
                por    (mm0, mm2);             /* mm0 |= mm2                    */

                pand   (mm0, mm6);             /* mm0 &= 0xfefefefefefefefe     */
                pcmpeqb (mm0, mm4);            /* is mm0 == 0 ?                 */

                movq   (mm2, eax);           /* mm2 = *p_data                 */
                psubb  (mm7, mm0);             /* mm7 has running total of eqcnts */

                movq   (mm5, mm3);             /* mm5 = mm3                     */
                movq   (mm3, mm2);             /* mm3 = *p_data                 */

                movq   (mm1, mm5);             /* mm1 = mm5                     */
                psubusb (mm5, mm2);            /* mm5 -= mm2                    */

                psubusb (mm2, mm1);            /* mm2 -= mm1                    */
                por    (mm5, mm2);             /* mm5 |= mm2                    */

                eax+=stride;           /* p_data += stride              */
                pand   (mm5, mm6);             /* mm5 &= 0xfefefefefefefefe     */

                pcmpeqb (mm5, mm4);            /* is mm0 == 0 ?                 */
                psubb  (mm7, mm5);             /* mm7 has running total of eqcnts */

                movq   (mm2, eax);           /* mm2 = *p_data                 */
                movq   (mm0, mm3);             /* mm0 = mm3                     */

                movq   (mm3, mm2);             /* mm3 = *p_data                 */
                movq   (mm1, mm0);             /* mm1 = mm0                     */

                psubusb (mm0, mm2);            /* mm0 -= mm2                    */
                eax+=stride;           /* p_data += stride              */

                psubusb (mm2, mm1);            /* mm2 -= mm1                    */
                por    (mm0, mm2);             /* mm0 |= mm2                    */

                pand   (mm0, mm6);             /* mm0 &= 0xfefefefefefefefe     */
                pcmpeqb (mm0, mm4);            /* is mm0 == 0 ?                 */

                psubb  (mm7, mm0);             /* mm7 has running total of eqcnts */

                //pop eax

        /* now mm7 contains negative eq_cnt for all 8-columns */
        /* copy this to mm_data1                              */
        /* sum all 8 bytes in mm7 */
                movq    (mm1, mm7);            /* mm1 = mm7             0 1w2 3 4 5 6 7r   */
                psrlq   (mm7, 32);             /* mm7 >>= 32            0 1 2 3 4 5 6 7m   */

                paddb   (mm7, mm1);            /* mm7 has running total of eqcnts */

                movq (mm1, mm7);               /* mm1 = mm7             0 1w2 3 4 5 6 7r   */
                psrlq   (mm7, 16);             /* mm7 >>= 16            0 1 2 3 4 5 6 7m   */

                paddb   (mm1, mm7);            /* mm7 has running total of eqcnts */

                movq (mm7, mm1);               /* mm1 = mm7             0 1w2 3 4 5 6 7r   */
                psrlq   (mm7, 8);              /* mm7 >>= 8             0 1 2 3 4 5 6 7m   */

                paddb   (mm7, mm1);            /* mm7 has running total of eqcnts */

                movd (mm_data1, mm7);          /* mm_data1 = mm7       */

        eq_cnt = mm_data1 & 0xff;

        useDC = (eq_cnt  > DEBLOCK_VERT_USEDC_THR);

        #ifdef PP_SELF_CHECK
        if (useDC != useDC2) DPRINTF(_l("ERROR: MMX version of useDC is incorrect"));
        #endif

        return useDC;
}

/* decide whether the DC filter should be turned on accoding to QP */
static inline int deblock_vert_DC_on(uint8_t *v, stride_t stride, int QP)
{
 __m64 QP_x_2=_mm_set1_pi32(0x02020202*QP);
 for (int i=0; i<5; i++)
  {
   //if (abs(v[i+1*stride]-v[i+8*stride]) > 2 *QP) DC_on2 = 0;
   /*
   if (abs(v[i+0*stride]-v[i+5*stride]) >= 2*QP) return false;
   if (abs(v[i+1*stride]-v[i+4*stride]) >= 2*QP) return false;
   if (abs(v[i+1*stride]-v[i+8*stride]) >= 2*QP) return false;
   if (abs(v[i+2*stride]-v[i+7*stride]) >= 2*QP) return false;
   if (abs(v[i+3*stride]-v[i+6*stride]) >= 2*QP) return false;
   */
   static const int v1[]={0,1,1,2,3},v2[]={5,4,8,7,6};
   uint8_t *ptr1=&(v[v1[i]*stride]);
   uint8_t *ptr2=&(v[v2[i]*stride]);
   __m64 mm0=*(__m64*)ptr1,mm1=mm0;
   __m64 mm2=*(__m64*)ptr2;
   mm0=_mm_subs_pu8(mm0,mm2);
   mm2=_mm_subs_pu8(mm2,mm1);
   mm0=_mm_or_si64(mm0,mm2);
   mm0=_mm_subs_pu8(mm0,QP_x_2);
   mm1=mm0;
   mm0=_mm_srli_si64(mm0,32);
   mm0=_mm_or_si64(mm0,mm1);
   int DC_on=_mm_cvtsi64_si32(mm0);
   if (DC_on) return 0;
  }
 return 1;
}

/* function using MMX to copy an 8-pixel wide column and unpack to 16-bit values */
/* n is the number of rows to copy - this must be even */
static inline void deblock_vert_copy_and_unpack(stride_t stride, uint8_t *source, uint64_t *dest, int n)
{
        uint64_t *pmm1 = (uint64_t *)source;
        uint64_t *pmm2 = (uint64_t *)dest;
        int i = -n / 2;

        #ifdef PP_SELF_CHECK
        int j, k;
        #endif

        /* copy block to local store whilst unpacking to 16-bit values */
        unsigned char *eax=(unsigned char*)pmm1;
        unsigned char *ebx=(unsigned char*)pmm2;

        __m64 mm7=_mm_setzero_si64();                   /* set mm7 = 0                     */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -