wmv2.c

来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 1,478 行 · 第 1/4 页

C
1,478
字号
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;    a0 = col[8*0];    a1 = col[8*1];    a2 = col[8*2];    a3 = col[8*3];    c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));    c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));    c1 = a1 * C1 + a3 * C2;    c3 = a1 * C2 - a3 * C1;    dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];    dest += line_size;    dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];    dest += line_size;    dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];    dest += line_size;    dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];}#define RN_SHIFT 15#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))#define R1 R_FIX(0.6532814824)#define R2 R_FIX(0.2705980501)#define R3 R_FIX(0.5)#define R_SHIFT 11static inline void idct4row(DCTELEM *row){    int c0, c1, c2, c3, a0, a1, a2, a3;    //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;    a0 = row[0];    a1 = row[1];    a2 = row[2];    a3 = row[3];    c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));    c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));    c1 = a1 * R1 + a3 * R2;    c3 = a1 * R2 - a3 * R1;    row[0]= (c0 + c1) >> R_SHIFT;    row[1]= (c2 + c3) >> R_SHIFT;    row[2]= (c2 - c3) >> R_SHIFT;    row[3]= (c0 - c1) >> R_SHIFT;}/* 8x4 & 4x8 WMV2 IDCT */#undef C_FIX#undef C1#undef C2#undef C3#define C_FIX(x) ((int)((x) * /*1.414213562 */ (1 << 15) + 0.5))#define C1 C_FIX(0.6532814824)#define C2 C_FIX(0.2705980501)#define C3 C_FIX(0.5)#define  wxr5  0x5A827642#define  wxr6  0x5A8230FC#define  wxr7  0x7D8A6A6E#define  wxr9  0x6A6E18F9#define  wxr10 0x471D7D8A#define  wxr8  0x18F9471D#ifdef JZ4740_MXU_OPT void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block){    int i;    DCTELEM *blk;    /* IDCT4 and store */    S32I2M(xr11, ((C1 << 16) | (C2 & 0xffff)));            // xr11  (R1, R2)    S32I2M(xr12, (1 << 14));                       // xr11  (R1, R2)    S32I2M(xr13, (C3)) ;                   // xr13 (R3)  for(i=0; i<4; i++)  {  DCTELEM *col;  col = block + i*2;  S32LDD(xr2,  col, 0x10);          //  xr2  (nx1, x1)  S32LDD(xr4,  col, 0x30);          //  xr4  (nx3, x3)  S32LDD(xr1,  col, 0x00);          //  xr1  (nx0, x0)  S32LDD(xr3,  col, 0x20);          //  xr3  (nx2, x2)  D16MUL_HW    (xr5, xr11, xr2, xr6);    // xr5 (na1*C1)   xr6 (a1*C1)  D16MAC_AA_LW (xr5, xr11, xr4, xr6);    // xr5.nc1 (na1*C1 + na3*C2)    xr6.c1 (a1*C1 + a3*C2)  D16MUL_LW    (xr7, xr11, xr2, xr8);    // xr7 (na1*C2)   xr8 (a1*C2)  D16MAC_SS_HW (xr7, xr11, xr4, xr8);    // xr7.nc3 (na1*C2 - na3*C1)    xr8.c3 (a1*C2 - a3*C1)  D16MUL_LW    (xr9,  xr13, xr1, xr10);  // xr9  (C3*na0)   xr10 (C3*a0)  D16MUL_LW    (xr14, xr13, xr3, xr15);  // xr14 (C3*na2)   xr15 (C3*a2)  D32ADD_AS    (xr1, xr9,  xr14, xr2);   // xr1.nc0 (na0 + na2)*C3   xr2.nc2 (na0-na2)*C3  D32ADD_AS    (xr3, xr10, xr15, xr4);   // xr3.c0  (a0 + a2)*C3      xr4.c2   (a0-a2)*C3  D32ADD_AS (xr9, xr3,  xr6, xr10);      // xr9: (c0 + c1)       xr10: (c0 - c1)  D32ACC_AS (xr9, xr12, xr0, xr10);      // xr9: (c0 + c1)       xr10: (c0 - c1)  D32ADD_AS (xr5, xr1,  xr5, xr6);       // xr5: (nc0 + nc1)     xr6:  (nc0 - nc1)  D32ACC_AS (xr5, xr12, xr0, xr6);       // xr5: (nc0 + nc1)     xr6:  (nc0 - nc1)  D32ADD_AS (xr14, xr4, xr8, xr15);      // xr14: (c2 + c3)        xr15: (c2 - c3)  D32ACC_AS (xr14, xr12,xr0, xr15);      // xr14: (c2 + c3)        xr15: (c2 - c3)  D32ADD_AS (xr7, xr2, xr7, xr8);    // xr7:  (nc2 + nc3)   xr8:  (nc2 - nc3)  D32ACC_AS (xr7, xr12,xr0, xr8);    // xr7:  (nc2 + nc3)   xr8:  (nc2 - nc3)  D32SARL(xr5, xr5, xr9, 15);           // xr5 (nc0+nc1, c0+c1)  S32STD (xr5, col, 0x0);  D32SARL(xr7, xr7,xr14, 15);           // xr7 (nc2+nc3, c2+c3)  S32STD (xr7, col, 0x10);  D32SARL(xr8, xr8, xr15, 15);          // xr8  (nc2-nc3, c2-c3)  S32STD (xr8, col, 0x20);  D32SARL(xr6, xr6, xr10, 15);          // xr6  (nc0-nc1, c0-c1)  S32STD (xr6, col, 0x30);  }  S32I2M(xr5,wxr5) ;                 // xr5 (W4, W2)  S32I2M(xr6,wxr6) ;                 // xr6 (W4, W6)  S32I2M(xr7,wxr7) ;                 // xr7 (W1, W3)  S32I2M(xr8,wxr8) ;                 // xr8 (W7, W5)  S32I2M(xr9,wxr9) ;                 // xr9 (W3, W7)  S32I2M(xr10,wxr10) ;               // xr10(W5,W1)   for (i = 0; i < 4; i++)   /* idct rows */   {        blk = block + (i << 3);        S32LDD(xr1, blk, 0);              //  xr1 (x1, x0)        S32LDD(xr2, blk, 0x4);            //  xr2 (x3, x2)        S32LDD(xr3, blk, 0x8);            //  xr3 (x5, x4)        S32LDD(xr4, blk, 0xc);            //  xr4 (x7, x6)        S32SFL(xr1,xr1,xr2,xr2, ptn3);  // xr1(x1, x3)  xr2(x0, x2)        S32SFL(xr3,xr3,xr4,xr4, ptn3);  // xr3(x5, x7)  xr4(x4, x6)        D16MUL_WW(xr11, xr2, xr5, xr12);            //xr11(w4*x0)  xr12(w2*x2)        D16MAC_AA_WW(xr11,xr4,xr6,xr12);            //xr11(w4*x0+w4*x4) xr12:(w2*x2+w6*x6)        D16MUL_WW(xr13, xr2, xr6, xr14);            //xr13(w4*x0)  xr14(w6*x2)        D16MAC_SS_WW(xr13,xr4,xr5,xr14);            //xr13(w4*x0-w4*x4) xr14:(w6*x2+w6*x6)        D16MUL_HW(xr2, xr1, xr7, xr4);              //xr2:w1*x1    xr4:w3*x1        D16MAC_AS_LW(xr2,xr1,xr9,xr4);              //xr2:w1*x1 + w3*x3 xr4:w3*x1-w7*x3        D16MAC_AS_HW(xr2,xr3,xr10,xr4);             //        D16MAC_AS_LW(xr2,xr3,xr8,xr4);                      D16MACF_AA_WW(xr2, xr0, xr0, xr4);   //        D16MACF_AA_WW(xr11, xr0, xr0, xr13); //        D16MACF_AA_WW(xr12, xr0, xr0, xr14); //        D16MUL_HW(xr13, xr1, xr8, xr14);            //w7*(x4+x5) store in xr14 ,w3*(x6+x7) store in xr15        D16MAC_SS_LW(xr13,xr1,xr10,xr14);                   D16MAC_AA_HW(xr13,xr3,xr9,xr14);                    D16MAC_SA_LW(xr13,xr3,xr7,xr14);                    D16MACF_AA_WW(xr14, xr0, xr0, xr13);         Q16ADD_AS_WW(xr11,xr11,xr12,xr12);   //        Q16ADD_AS_WW(xr11, xr11, xr2, xr2);  // xr11 ((a0 + b0), (a1 + b1))  // xr2  ((a0 - b0), (a1 - b1))        Q16ADD_AS_XW(xr12, xr12, xr14, xr14);// xr12 ((a2 + b2), (a3 + b3))  // xr14 ((a2 - b2), (a3 - b3))        S32SFL(xr11,xr11,xr12,xr12, ptn3);   // xr11 ((a0 + b0), (a2 + b2))  // xr12 ((a1 + b1), (a3 + b3))        S32SFL(xr12,xr12,xr11,xr11, ptn3);         // xr11 ((a3 + b3), (a2 + b2))        S32LDD(xr3, dest, 0);        Q8ACCE_AA(xr11, xr3, xr0, xr12);        Q16SAT(xr3,xr11, xr12);        S32STD(xr3, dest, 0x0);        S32LDD(xr3, dest, 4);        Q8ACCE_AA(xr2, xr3, xr0, xr14);        Q16SAT(xr3,xr2, xr14);        S32STD(xr3, dest, 0x4);        dest += line_size;  }}#elsevoid simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block){    int i;       /* IDCT8 on each line */    for(i=0; i<4; i++) {        idctRowCondDC(block + i*8);    }    /* IDCT4 and store */    for(i=0;i<8;i++) {        idct4col_add(dest + i, line_size, block + i);    }}#endif#undef R_FIX(x)#undef R1#undef R2#undef R3#define R_FIX(x) ((int)((x) /* 1.414213562*/ * (1 << 15) + 0.5))#define R1 R_FIX(0.6532814824)#define R2 R_FIX(0.2705980501)#define R3 R_FIX(0.5)#ifdef JZ4740_MXU_OPTvoid simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block){    int i;        DCTELEM *row;    /* IDCT8 and store */        S32I2M(xr5,wxr5) ;                 // xr5 (W4, W2)        S32I2M(xr6,wxr6) ;                 // xr6 (W4, W6)        S32I2M(xr7,wxr7) ;                 // xr7 (W1, W3)        S32I2M(xr8,wxr8) ;                 // xr8 (W7, W5)        S32I2M(xr9,wxr9) ;                 // xr9 (W3, W7)        S32I2M(xr10,wxr10) ;               // xr10(W5, W1)        for(i=0; i<2; i++){        DCTELEM *blk;        blk = block + 2*i;        S32LDD(xr1, blk, 0x00);  //  xr1 (x4, x0)        S32LDD(xr2, blk, 0x10);  //  xr2 (x7, x3)        S32LDD(xr3, blk, 0x20);  //  xr3 (x6, x1)        S32LDD(xr4, blk, 0x30);  //  xr4 (x5, x2)        S32LDD(xr11, blk, 0x40);         //  xr1 (x4, x0)        S32LDD(xr12, blk, 0x50);         //  xr2 (x7, x3)        S32LDD(xr13, blk, 0x60);         //  xr3 (x6, x1)        S32LDD(xr14, blk, 0x70);         //  xr4 (x5, x2)        S32I2M(xr5,wxr5);        D16MUL_HW(xr2, xr5, xr1, xr9);    //xr2(h0 * W4) xr9(x0 * W4)        D16MAC_AA_HW(xr2,xr5,xr11,xr9);   //xr2(h0 * W4 + h4 * W4)    //xr9(x0 * W4 + x4 * W4)        D16MACF_AA_WW(xr2, xr0, xr0, xr9);  //xr2((h0*W4 + h4*W4), (x0*W4 + x4*W4))        D16MUL_LW(xr10, xr5, xr3, xr15);          // xr10(h2 * W2) xr15(x2 * W2)        D16MAC_AA_LW(xr10,xr6,xr13,xr15);         // xr10(h2 * W2 + h6 * W6)    // xr15(x2 * W2 + x6 * W6)        D16MACF_AA_WW(xr10, xr0, xr0, xr15);      // xr10((h2*W2 + h6*W6), (x2*W2 + x6*W6))        Q16ADD_AS_WW(xr2,xr2,xr10,xr9);   //xr2(h0*W4 + h4*W4 + h2*W2 + h6*W6) (x0*W4 + x4*W4 + x2*W2 + x6*W6)                                          //xr9(h0*W4 + h4*W4 - h2*W2 - h6*W6) (x0*W4 + x4*W4 - x2*W2 - x6*W6)        D16MUL_HW(xr10, xr5, xr1, xr15);          // xr10 (h0 * W4) xr15 (x0 * W4)        D16MAC_SS_HW(xr10,xr5,xr11,xr15);         // xr10 (h0 * W4 - h4 * W4)                                                  // xr15 (x0 * W4 - x4 * W4)        D16MACF_AA_WW(xr10, xr0, xr0, xr15);      // xr10 ((h0*W4 - h4*W4), (x0*W4 - x4*W4))        D16MUL_LW(xr11, xr6, xr3, xr1);           // xr11 (h2 * W6) xr1  (x2 * W6)        D16MAC_SS_LW(xr11,xr5,xr13,xr1);          // xr11 (h2 * W6 - h6 * W2)                                                  // xr1  (x2 * W6 - x6 * W2)        D16MACF_AA_WW(xr11, xr0, xr0, xr1);       // xr11 ((h2*W6 - h6*W2), (x2*W6 - x6*W2))        Q16ADD_AS_WW(xr10,xr10,xr11,xr15);         S32LDD(xr1, blk, 0x10);                    //  xr1 (x4, x0)        D16MUL_HW(xr11, xr7, xr1, xr13);           //        D16MAC_AA_LW(xr11,xr7,xr4,xr13);           //        D16MAC_AA_LW(xr11,xr8,xr12,xr13);          //        D16MAC_AA_HW(xr11,xr8,xr14,xr13);          //        D16MACF_AA_WW(xr11, xr0, xr0, xr13);       //        D16MUL_LW(xr3, xr7, xr1, xr13);                   D16MAC_SS_HW(xr3,xr8,xr4,xr13);                   D16MAC_SS_HW(xr3,xr7,xr12,xr13);                  D16MAC_SS_LW(xr3,xr8,xr14,xr13);                  D16MACF_AA_WW(xr3, xr0, xr0, xr13);               D16MUL_LW(xr5, xr8, xr1, xr13);                   D16MAC_SS_HW(xr5,xr7,xr4,xr13);                   D16MAC_AA_HW(xr5,xr8,xr12,xr13);                  D16MAC_AA_LW(xr5,xr7,xr14,xr13);                  D16MACF_AA_WW(xr5, xr0, xr0, xr13);         D16MUL_HW(xr1, xr8, xr1, xr13);                   D16MAC_SS_LW(xr1,xr8,xr4,xr13);                   D16MAC_AA_LW(xr1,xr7,xr12,xr13);                         D16MAC_SS_HW(xr1,xr7,xr14,xr13);                        D16MACF_AA_WW(xr1, xr0, xr0, xr13);          Q16ADD_AS_WW(xr2,xr2,xr11,xr11);         //        Q16ADD_AS_WW(xr10,xr10,xr3,xr3);         //        Q16ADD_AS_WW(xr15,xr15,xr5,xr5);         //        Q16ADD_AS_WW(xr9,xr9,xr1,xr1);          //        S32STD(xr2, blk, 0x00);        S32STD(xr3, blk, 0x60);        S32STD(xr10, blk, 0x10);        S32STD(xr11, blk, 0x70);        S32STD(xr1, blk, 0x40);        S32STD(xr9, blk, 0x30);        S32STD(xr15, blk, 0x20);        S32STD(xr5, blk, 0x50);    }        S32I2M(xr11, ((R1 << 16) | (R2 & 0xffff)));                // xr11  (R1, R2)        S32I2M(xr12, (1 << 14));                   // xr11  (R1, R2)        S32I2M(xr13, (R3)) ;               // xr13 (R3)        dest -= line_size;        row = block;        i = (unsigned int)row + 8 * 8 * 2;    /* IDCT4 on each line */    while ((unsigned int)row < i) {    S32LDD(xr1,  row, 0x00);     //  xr1 (x1, x0)    S32LDD(xr2,  row, 0x4);      //  xr2 (x3, x2)    S32LDD(xr14, row, 0x10);     //  xr14 (x1, x0)    S32LDD(xr15, row, 0x14);           //  xr15 (x3, x2)    S32SFL(xr1, xr1, xr2, xr2, ptn3);  // xr1 (a1.xr1, a3.xr3) xr2 (a0.x0, a2.x2)    D16MUL_HW    (xr3, xr1, xr11, xr4);    // xr3 (a1*R1)   xr4 (a1*R2)    D16MAC_SA_LW (xr4, xr1, xr11, xr3);    // xr4.c3 (a1*R2 - a3*R1)   xr3.c1 (a1*R1 + a3*R2)    D16MUL_LW(xr5, xr13, xr2, xr6);    // xr5 (R3*a0)   xr6 (R3*a2)    D32ADD_AS(xr5, xr5, xr6, xr6);     // xr5.c0 (a0 + a2) *R3   xr6.c2 (a0-a2) *R3    D32ADD_AS (xr9, xr5, xr3, xr10);   // xr9: (c0 + c1)   xr10: (c0 - c1)    D32ADD_AS (xr7, xr6, xr4, xr8);    // xr7: (c2 + c3)   xr8: (c2 - c3)    D32ACC_AS (xr7, xr12, xr0, xr9);   // xr7: (c2 + c3)   xr9: (c0 + c1)    D32ACC_AS (xr10,xr12, xr0, xr8);   // xr10: (c0 - c1)   xr8: (c2 - c3)    S32LDIV(xr1, dest, line_size, 0x00);          //  xr1 (dest3,dest2,dest1,dest0)    D32SARL(xr7, xr7, xr9, 15);        // xr7 (c2+c3, c0+c1)    D32SARL(xr8, xr10,xr8, 15);        // xr8 (c0-c1, c2-c3)    Q8ACCE_AA(xr8, xr1, xr0, xr7);//    S32SFL(xr14, xr14, xr15, xr15, ptn3);  // xr14 (a1.xr1, a3.xr3) xr15 (a0.x0, a2.x2)    D16MUL_HW    (xr3, xr14, xr11, xr4);    // xr3 (a1*R1)   xr4 (a1*R2)    D16MAC_SA_LW (xr4, xr14, xr11, xr3);    // xr4.c3 (a1*R2 - a3*R1)   xr3.c1 (a1*R1 + a3*R2)    D16MUL_LW(xr5, xr13, xr15, xr6);     // xr5 (R3*a0)   xr6 (R3*a2)    D32ADD_AS(xr5, xr5, xr6, xr6);       // xr5.c0 (a0 + a2) *R3   xr6.c2 (a0-a2) *R3    D32ADD_AS (xr1, xr5, xr3, xr2);      // xr1: (c0 + c1)   xr2: (c0 - c1)    D32ADD_AS (xr3, xr6, xr4, xr4);      // xr3: (c2 + c3)   xr4: (c2 - c3)    D32ACC_AS (xr3, xr12, xr0, xr1);     // xr3: (c2 + c3)   xr1: (c0 + c1)    D32ACC_AS (xr2,xr12, xr0, xr4);      // xr2: (c0 - c1)   xr4: (c2 - c3)    S32LDDV(xr5, dest, line_size, 0x00); //  xr1 (dest3,dest2,dest1,dest0)    D32SARL(xr3, xr3, xr1, 15);          // xr3 (c2+c3, c0+c1)    D32SARL(xr4, xr2,xr4, 15);           // xr4 (c0-c1, c2-c3)    Q8ACCE_AA(xr4, xr5, xr0, xr3);// store    Q16SAT(xr8, xr8, xr7);    Q16SAT(xr4, xr4, xr3);    S32STD(xr8, dest, 0);    S32SDIV(xr4, dest, line_size, 0);    row += 16;    }}#elsevoid simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block){    int i;    /* IDCT4 on each line */    for(i=0; i<8; i++) {        idct4row(block + i*8);    }    /* IDCT8 and store */    for(i=0; i<4; i++){        idctSparseColAdd(dest + i, line_size, block + i);    }}#endif#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?