simple_idct.c

来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 925 行 · 第 1/3 页

C
925
字号
        }        if (col[8*7]) {            MAC16(b0, + W7, col[8*7]);            MAC16(b1, - W5, col[8*7]);            MAC16(b2, + W3, col[8*7]);            MAC16(b3, - W1, col[8*7]);        }        dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];        dest += line_size;        dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];        dest += line_size;        dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];        dest += line_size;        dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];        dest += line_size;        dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];        dest += line_size;        dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];        dest += line_size;        dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];        dest += line_size;        dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];}static inline void idctSparseCol (DCTELEM * col){        int a0, a1, a2, a3, b0, b1, b2, b3;        /* XXX: I did that only to give same values as previous code */        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));        a1 = a0;        a2 = a0;        a3 = a0;        a0 +=  + W2*col[8*2];        a1 +=  + W6*col[8*2];        a2 +=  - W6*col[8*2];        a3 +=  - W2*col[8*2];        MUL16(b0, W1, col[8*1]);        MUL16(b1, W3, col[8*1]);        MUL16(b2, W5, col[8*1]);        MUL16(b3, W7, col[8*1]);        MAC16(b0, + W3, col[8*3]);        MAC16(b1, - W7, col[8*3]);        MAC16(b2, - W1, col[8*3]);        MAC16(b3, - W5, col[8*3]);        if(col[8*4]){            a0 += + W4*col[8*4];            a1 += - W4*col[8*4];            a2 += - W4*col[8*4];            a3 += + W4*col[8*4];        }        if (col[8*5]) {            MAC16(b0, + W5, col[8*5]);            MAC16(b1, - W1, col[8*5]);            MAC16(b2, + W7, col[8*5]);            MAC16(b3, + W3, col[8*5]);        }        if(col[8*6]){            a0 += + W6*col[8*6];            a1 += - W2*col[8*6];            a2 += + W2*col[8*6];            a3 += - W6*col[8*6];        }        if (col[8*7]) {            MAC16(b0, + W7, col[8*7]);            MAC16(b1, - W5, col[8*7]);            MAC16(b2, + W3, col[8*7]);            MAC16(b3, - W1, col[8*7]);        }        col[0 ] = ((a0 + b0) >> COL_SHIFT);        col[8 ] = ((a1 + b1) >> COL_SHIFT);        col[16] = ((a2 + b2) >> COL_SHIFT);        col[24] = ((a3 + b3) >> COL_SHIFT);        col[32] = ((a3 - b3) >> COL_SHIFT);        col[40] = ((a2 - b2) >> COL_SHIFT);        col[48] = ((a1 - b1) >> COL_SHIFT);        col[56] = ((a0 - b0) >> COL_SHIFT);}#ifdef JZ4740_MXU_OPTvoid simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block){   DCTELEM *inptr = block, *endptr;   S32I2M(xr5,WM44) ;         // xr5 (W4, W4)   S32I2M(xr6,WM26) ;         // xr6 (W2, W6)   S32I2M(xr7,WM13) ;         // xr7 (W1, W3)   S32I2M(xr8,WM57) ;         // xr8 (W5, W7)   endptr = inptr + 8;// calculate column   S32LDD(xr1, inptr, 0x00);    //  xr1 (nx0, x0)   do {// calculate a0,a1,a2,a3      S32LDD(xr2, inptr, 0x20);    //  xr2 (nx2, x2)      S32LDD(xr3, inptr, 0x40);    //  xr3 (nx4, x4)      S32LDD(xr4, inptr, 0x60);    //  xr4 (nx6, x6)// computer ah0,a0 ah3,a3      D16MUL_HW(xr11,  xr5, xr1, xr13);    //xr11 (W4*nx0) xr13(W4*x0)      D16MAC_AA_HW(xr11, xr5, xr3, xr13);  //xr11(W4*nx0 + W4*nx4) xr13(W4*x0 + W4*x4)      D16MUL_HW(xr12,xr6,xr2,xr14);        //xr12 (W2*nx2) xr14(W2*x2)      D16MAC_AA_LW(xr12,xr6,xr4,xr14);     //xr12 (W2*nx2 + W6*nx6) xr14(W2*x2 + W6*x6)      D32ADD_AS(xr11, xr11, xr12, xr12);   //xr11 (W4*nx0 + W4*nx4 + W2*nx2 + W6*nx6),na0                                           //xr12 (W4*nx0 + W4*nx4 - W2*nx2 - W6*nx6),na3      D32ADD_AS(xr13, xr13, xr14, xr14);   //xr13 (W4*x0 + W4*x4 + W2*x2 + W6*x6),a0                                           //xr14 (W4*x0 + W4*x4 - W2*x2 - W6*x6),a3      D16MACF_AA_WW(xr11, xr0, xr0, xr13); // r11 (na0,a0)      D16MACF_AA_WW(xr12, xr0, xr0, xr14); // r12 (na3,a3)//      D16MUL_HW(xr13,  xr5, xr1, xr10);      //xr13(W4*nx0) xr10(W4*x0)      D16MAC_SS_HW(xr13, xr5, xr3, xr10);    //xr13(W4*nx0 - W4*nx4) xr10(W4*x0 - W4*x4)      D16MUL_LW(xr14,xr6,xr2,xr9);          //xr14 (W6*nx2)  xr9(W6*x2)      D16MAC_SS_HW(xr14,xr6,xr4,xr9);       //xr14 (W6*nx2 - W2*nx6) xr9(W6*x2 - W2*x6)      D32ADD_AS(xr13, xr13, xr14, xr14);    //xr13 (W4*nx0 - W4*nx4 + W6*nx2 - W2*nx6),na1                                            //xr14 (W4*nx0 - W4*nx4 - W6*nx2 + W2*nx6),na2      D32ADD_AS(xr10, xr10, xr9, xr9);      //xr10 (W4*x0 - W4*x4 + W6*x2 - W2*x6),a1                                            //xr9 (W4*x0 - W4*x4 - W6*x2 + W2*x6),a2      D16MACF_AA_WW(xr13, xr0, xr0, xr10);  // r13 (na1,a1)      D16MACF_AA_WW(xr14, xr0, xr0, xr9);   // r14 (na2,a2)//-------------------------------------------------------------------------------------      S32LDD(xr1, inptr, 0x10);    //  xr1 (nx1, x1)      S32LDD(xr2, inptr, 0x30);    //  xr2 (nx3, x3)      S32LDD(xr3, inptr, 0x50);    //  xr3 (nx5, x5)      S32LDD(xr4, inptr, 0x70);    //  xr4 (nx7, x7)// calculate b0,b1,b2,b3// calculate b0      D16MUL_HW(xr9, xr7, xr1, xr10);      //xr9(nx1 * W1) xr10(x1 * W1)      D16MAC_AA_LW(xr9, xr7, xr2, xr10);   //xr9(nx1*W1 + nx3*W3) xr10(x1*W1 + x3*W3)      D16MAC_AA_HW(xr9, xr8, xr3, xr10);   //xr9(nx1*W1 + nx3*W3 + nx5*W5)                                           //xr10(x1*W1 + x3*W3 + x5*W5)      D16MAC_AA_LW(xr9, xr8, xr4, xr10);   //xr9(nx1*W1 + nx3*W3 + nx5*W5 + nx7*W7)                                           //xr10(x1*W1 + x3*W3 + x5*W5 + nx7*W7)      D16MACF_AA_WW(xr9, xr0, xr0, xr10);  //xr9 (nb0,b0)// calculate b1      D16MUL_LW(xr10, xr7, xr1, xr15);      //xr10(nx1 * W3) xr15(x1 * W3)      D16MAC_SS_LW(xr10, xr8, xr2, xr15);   //xr10(nx1*W3 - nx3*W7) xr15(x1*W3 - x3*W7)      D16MAC_SS_HW(xr10, xr7, xr3, xr15);   //xr10(nx1*W3 - nx3*W7 - nx5*W1)                                            //xr15(x1*W3 - x3*W7 - x5*W1)      D16MAC_SS_HW(xr10, xr8, xr4, xr15);   //xr10(nx1*W3 - nx3*W7 - nx5*W1 - nx7*W5)                                            //xr15(x1*W3 - x3*W7 - x5*W1 - nx7*W5)      D16MACF_AA_WW(xr10, xr0, xr0, xr15);  //xr10 (nb1,b1)// store result      Q16ADD_AS_WW(xr11,xr11,xr9,xr9);      //xr11(na0+nb0,a0+b0) xr9(na0-nb0, a0-b0)      S32STD(xr11, inptr, 0x00);      S32STD(xr9,  inptr, 0x70);      Q16ADD_AS_WW(xr13,xr13,xr10,xr10);    //xr13(na1+nb1,a1+b1) xr10(na1-nb1, a1-b1)      S32STD(xr13, inptr, 0x10);      S32STD(xr10, inptr, 0x60);// calculate b2      D16MUL_HW(xr9, xr8, xr1, xr10);      //xr9(nx1 * W5) xr10(x1 * W5)      D16MAC_SS_HW(xr9, xr7, xr2, xr10);   //xr9(nx1*W5 - nx3*W1) xr10(x1*W5 - x3*W1)      D16MAC_AA_LW(xr9, xr8, xr3, xr10);   //xr9(nx1*W5 - nx3*W1 + nx5*W7)                                           //xr10(x1*W5 - x3*W1 + x5*W7)      D16MAC_AA_LW(xr9, xr7, xr4, xr10);   //xr9(nx1*W5 - nx3*W1 + nx5*W7 + nx7*W3)                                           //xr10(x1*W5 - x3*W1 + x5*W7 + nx7*W3)      D16MACF_AA_WW(xr9, xr0, xr0, xr10);  //xr9 (nb2,b2)// calculate b3      D16MUL_LW(xr10, xr8, xr1, xr15);      //xr10(nx1 * W7) xr15(x1 * W7)      D16MAC_SS_HW(xr10, xr8, xr2, xr15);   //xr10(nx1*W7 - nx3*W5) xr15(x1*W7 - x3*W5)      D16MAC_AA_LW(xr10, xr7, xr3, xr15);   //xr10(nx1*W7 - nx3*W5 + nx5*W3)                                            //xr15(x1*W7 - x3*W5 + x5*W3)      D16MAC_SS_HW(xr10, xr7, xr4, xr15);   //xr10(nx1*W7 - nx3*W5 + nx5*W3 - nx7*W1)                                            //xr15(x1*W7 - x3*W5 + x5*W3 - nx7*W1)      D16MACF_AA_WW(xr10, xr0, xr0, xr15);  //xr10 (nb3,b3)// store result      Q16ADD_AS_WW(xr14,xr14,xr9,xr9);      //xr14(na2+nb2,a2+b2) xr9(na2-nb2, a2-b2)      S32STD(xr14, inptr, 0x20);      S32STD(xr9,  inptr, 0x50);      Q16ADD_AS_WW(xr12,xr12,xr10,xr10);    //xr12(na3+nb3,a3+b3) xr10(na3-nb3, a3-b3)      S32LDI(xr1,  inptr, 0x04);       //  xr1 (nx0, x0)      S32STD(xr12, inptr, 0x2c);      S32STD(xr10, inptr, 0x3c);   } while (inptr != endptr);   inptr = block;   endptr = inptr + 8*8;// calculate line   S32LDD(xr1, inptr, 0);          //  xr1 (x1, x0)   do {      S32LDD(xr2, inptr, 0x4);        //  xr2 (x3, x2)      S32LDD(xr3, inptr, 0x8);        //  xr3 (x5, x4)      S32LDD(xr4, inptr, 0xc);        //  xr4 (x7, x6)// calculate b0,b1,b2,b3      D16MUL_HW(xr11, xr1, xr7, xr12); // xr11 (W1*x1)  xr12 (W3*x1)      D16MUL_HW(xr13, xr1, xr8, xr14); // xr13 (W5*x1)  xr14 (W7*x1)      D16MAC_SA_HW(xr13,xr2,xr7,xr11); // xr13 (W5*x1 - W1*x3) xr11 (W1*x1 + W3*x3)      D16MAC_SS_HW(xr14,xr2,xr8,xr12); // xr14 (W7*x1 - W5*x3) xr12 (W3*x1 - W7*x3)      D16MAC_SA_HW(xr12,xr3,xr7,xr14); // xr12 (W3*x1 - W7*x3 - W1*x5)                                       // xr14 (W7*x1 - W5*x3 + W3*x5)      D16MAC_AA_HW(xr11,xr3,xr8,xr13); // xr11 (W1*x1 + W3*x3 + W5*x5)                                       // xr13 (W5*x1 - W1*x3 + W7*x5)//calculate a0,a1,a2,a3      D16MUL_LW(xr9, xr1, xr5, xr10); // xr9 (W4*x0)  xr10 (W4*x0)      D16MAC_AS_LW(xr9,xr3,xr5,xr10); // xr9 (W4*x0 + W4*x4) xr10 (W4*x0 - W4*x4)      D16MUL_LW(xr1, xr2, xr6, xr3); // xr1 (W2*x2)  xr3 (W6*x2)      D16MAC_SA_LW(xr3,xr4,xr6,xr1); // xr3 (W6*x2 - W2*x6) xr1 (W2*x2 + W6*x6)//schedule b0 ~ b3      D16MAC_SA_HW(xr12,xr4,xr8,xr11); // xr12 (W3*x1 - W7*x3 - W1*x5 - W5*x7),b1                                       // xr11 (W1*x1 + W3*x3 + W5*x5 + W7*x7),b0      D16MAC_SA_HW(xr14,xr4,xr7,xr13); // xr14 (W7*x1 - W5*x3 + W3*x5 - W1*x7),b3                                       // xr13 (W5*x1 - W1*x3 + W7*x5 + W3*x7),b2// continue a0 ~ a3      D32ADD_AS(xr2, xr9, xr1, xr4);   //xr2 (W4*x0 + W4*x4 + W2*x2 + W6*x6),a0                                       //xr4 (W4*x0 + W4*x4 - W2*x2 - W6*x6),a3      D32ADD_AS(xr9,xr10,xr3, xr1);    //xr9(W4*x0 - W4*x4 + W6*x2 - W2*x6),a1                                       //xr1(W4*x0 - W4*x4 - W6*x2 + W2*x6),a2//calculate a +/- b      D32ADD_AS(xr2, xr2, xr11, xr11); //xr2(a0 + b0)  xr11 (a0 - b0)      D32ADD_AS(xr4, xr4, xr14, xr14); //xr4(a3 + b3)  xr14 (a3 - b3)      D32ADD_AS(xr9, xr9, xr12, xr12); //xr9(a1 + b1)  xr12 (a1 - b1)      D32ADD_AS(xr1, xr1, xr13, xr13); //xr1(a2 + b2)  xr13 (a2 - b2)//padding and saturate      D16MACF_AA_WW(xr9, xr0, xr0, xr2);  // r9 (a1+b1, a0+b0)      D16MACF_AA_WW(xr4, xr0, xr0, xr1);  // r4 (a3+b3, a2+b2)      D16MACF_AA_WW(xr13,xr0, xr0, xr14); // r13(a2-b2, a3-b3)      D16MACF_AA_WW(xr11,xr0, xr0, xr12); // r11(a0-b0, a1-b1)      Q16SAT(xr4, xr4, xr9);      Q16SAT(xr11,xr11, xr13);      S32LDI(xr1, inptr,0x10);          //  xr1 (x1, x0)      S32STD(xr4, dest, 0x0);      S32STD(xr11,dest, 0x4);      dest += line_size;   } while (inptr != endptr);}#elsevoid simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block){    int i;    for(i=0; i<8; i++)        idctRowCondDC(block + i*8);    for(i=0; i<8; i++)        idctSparseColPut(dest + i, line_size, block + i);}#endif#ifdef JZ4740_MXU_OPTvoid simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block){   DCTELEM *inptr = block, *endptr;   S32I2M(xr5,WM44) ;         // xr5 (W4, W4)   S32I2M(xr6,WM26) ;         // xr6 (W2, W6)   S32I2M(xr7,WM13) ;         // xr7 (W1, W3)   S32I2M(xr8,WM57) ;         // xr8 (W5, W7)   endptr = inptr + 8;// calculate column   S32LDD(xr1, inptr, 0x00);    //  xr1 (nx0, x0)   do {// calculate a0,a1,a2,a3      S32LDD(xr2, inptr, 0x20);    //  xr2 (nx2, x2)      S32LDD(xr3, inptr, 0x40);    //  xr3 (nx4, x4)      S32LDD(xr4, inptr, 0x60);    //  xr4 (nx6, x6)// computer ah0,a0 ah3,a3      D16MUL_HW(xr11,  xr5, xr1, xr13);    //xr11 (W4*nx0) xr13(W4*x0)      D16MAC_AA_HW(xr11, xr5, xr3, xr13);  //xr11(W4*nx0 + W4*nx4) xr13(W4*x0 + W4*x4)      D16MUL_HW(xr12,xr6,xr2,xr14);        //xr12 (W2*nx2) xr14(W2*x2)      D16MAC_AA_LW(xr12,xr6,xr4,xr14);     //xr12 (W2*nx2 + W6*nx6) xr14(W2*x2 + W6*x6)      D32ADD_AS(xr11, xr11, xr12, xr12);   //xr11 (W4*nx0 + W4*nx4 + W2*nx2 + W6*nx6),na0                                           //xr12 (W4*nx0 + W4*nx4 - W2*nx2 - W6*nx6),na3      D32ADD_AS(xr13, xr13, xr14, xr14);   //xr13 (W4*x0 + W4*x4 + W2*x2 + W6*x6),a0                                           //xr14 (W4*x0 + W4*x4 - W2*x2 - W6*x6),a3      D16MACF_AA_WW(xr11, xr0, xr0, xr13); // r11 (na0,a0)      D16MACF_AA_WW(xr12, xr0, xr0, xr14); // r12 (na3,a3)//      D16MUL_HW(xr13,  xr5, xr1, xr10);      //xr13(W4*nx0) xr10(W4*x0)      D16MAC_SS_HW(xr13, xr5, xr3, xr10);    //xr13(W4*nx0 - W4*nx4) xr10(W4*x0 - W4*x4)      D16MUL_LW(xr14,xr6,xr2,xr9);          //xr14 (W6*nx2)  xr9(W6*x2)      D16MAC_SS_HW(xr14,xr6,xr4,xr9);       //xr14 (W6*nx2 - W2*nx6) xr9(W6*x2 - W2*x6)      D32ADD_AS(xr13, xr13, xr14, xr14);    //xr13 (W4*nx0 - W4*nx4 + W6*nx2 - W2*nx6),na1                                            //xr14 (W4*nx0 - W4*nx4 - W6*nx2 + W2*nx6),na2      D32ADD_AS(xr10, xr10, xr9, xr9);      //xr10 (W4*x0 - W4*x4 + W6*x2 - W2*x6),a1                                            //xr9 (W4*x0 - W4*x4 - W6*x2 + W2*x6),a2      D16MACF_AA_WW(xr13, xr0, xr0, xr10);  // r13 (na1,a1)      D16MACF_AA_WW(xr14, xr0, xr0, xr9);   // r14 (na2,a2)//-------------------------------------------------------------------------------------      S32LDD(xr1, inptr, 0x10);    //  xr1 (nx1, x1)      S32LDD(xr2, inptr, 0x30);    //  xr2 (nx3, x3)      S32LDD(xr3, inptr, 0x50);    //  xr3 (nx5, x5)      S32LDD(xr4, inptr, 0x70);    //  xr4 (nx7, x7)// calculate b0,b1,b2,b3// calculate b0      D16MUL_HW(xr9, xr7, xr1, xr10);      //xr9(nx1 * W1) xr10(x1 * W1)      D16MAC_AA_LW(xr9, xr7, xr2, xr10);   //xr9(nx1*W1 + nx3*W3) xr10(x1*W1 + x3*W3)      D16MAC_AA_HW(xr9, xr8, xr3, xr10);   //xr9(nx1*W1 + nx3*W3 + nx5*W5)                                           //xr10(x1*W1 + x3*W3 + x5*W5)      D16MAC_AA_LW(xr9, xr8, xr4, xr10);   //xr9(nx1*W1 + nx3*W3 + nx5*W5 + nx7*W7)                                           //xr10(x1*W1 + x3*W3 + x5*W5 + nx7*W7)      D16MACF_AA_WW(xr9, xr0, xr0, xr10);  //xr9 (nb0,b0)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?