simple_idct.c
来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 925 行 · 第 1/3 页
C
925 行
} if (col[8*7]) { MAC16(b0, + W7, col[8*7]); MAC16(b1, - W5, col[8*7]); MAC16(b2, + W3, col[8*7]); MAC16(b3, - W1, col[8*7]); } dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];}static inline void idctSparseCol (DCTELEM * col){ int a0, a1, a2, a3, b0, b1, b2, b3; /* XXX: I did that only to give same values as previous code */ a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); a1 = a0; a2 = a0; a3 = a0; a0 += + W2*col[8*2]; a1 += + W6*col[8*2]; a2 += - W6*col[8*2]; a3 += - W2*col[8*2]; MUL16(b0, W1, col[8*1]); MUL16(b1, W3, col[8*1]); MUL16(b2, W5, col[8*1]); MUL16(b3, W7, col[8*1]); MAC16(b0, + W3, col[8*3]); MAC16(b1, - W7, col[8*3]); MAC16(b2, - W1, col[8*3]); MAC16(b3, - W5, col[8*3]); if(col[8*4]){ a0 += + W4*col[8*4]; a1 += - W4*col[8*4]; a2 += - W4*col[8*4]; a3 += + W4*col[8*4]; } if (col[8*5]) { MAC16(b0, + W5, col[8*5]); MAC16(b1, - W1, col[8*5]); MAC16(b2, + W7, col[8*5]); MAC16(b3, + W3, col[8*5]); } if(col[8*6]){ a0 += + W6*col[8*6]; a1 += - W2*col[8*6]; a2 += + W2*col[8*6]; a3 += - W6*col[8*6]; } if (col[8*7]) { MAC16(b0, + W7, col[8*7]); MAC16(b1, - W5, col[8*7]); MAC16(b2, + W3, col[8*7]); MAC16(b3, - W1, col[8*7]); } col[0 ] = ((a0 + b0) >> COL_SHIFT); col[8 ] = ((a1 + b1) >> COL_SHIFT); col[16] = ((a2 + b2) >> COL_SHIFT); col[24] = ((a3 + b3) >> COL_SHIFT); col[32] = ((a3 - b3) >> COL_SHIFT); col[40] = ((a2 - b2) >> COL_SHIFT); col[48] = ((a1 - b1) >> COL_SHIFT); col[56] = ((a0 - b0) >> COL_SHIFT);}#ifdef JZ4740_MXU_OPTvoid simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block){ DCTELEM *inptr = block, *endptr; S32I2M(xr5,WM44) ; // xr5 (W4, W4) S32I2M(xr6,WM26) ; // xr6 (W2, W6) S32I2M(xr7,WM13) ; // xr7 (W1, W3) S32I2M(xr8,WM57) ; // xr8 (W5, W7) endptr = inptr + 8;// calculate column S32LDD(xr1, inptr, 0x00); // xr1 (nx0, x0) do {// calculate a0,a1,a2,a3 S32LDD(xr2, inptr, 0x20); // xr2 (nx2, x2) S32LDD(xr3, inptr, 0x40); // xr3 (nx4, x4) S32LDD(xr4, inptr, 0x60); // xr4 (nx6, x6)// computer ah0,a0 ah3,a3 D16MUL_HW(xr11, xr5, xr1, xr13); //xr11 (W4*nx0) xr13(W4*x0) D16MAC_AA_HW(xr11, xr5, xr3, xr13); //xr11(W4*nx0 + W4*nx4) xr13(W4*x0 + W4*x4) D16MUL_HW(xr12,xr6,xr2,xr14); //xr12 (W2*nx2) xr14(W2*x2) D16MAC_AA_LW(xr12,xr6,xr4,xr14); //xr12 (W2*nx2 + W6*nx6) xr14(W2*x2 + W6*x6) D32ADD_AS(xr11, xr11, xr12, xr12); //xr11 (W4*nx0 + W4*nx4 + W2*nx2 + W6*nx6),na0 //xr12 (W4*nx0 + W4*nx4 - W2*nx2 - W6*nx6),na3 D32ADD_AS(xr13, xr13, xr14, xr14); //xr13 (W4*x0 + W4*x4 + W2*x2 + W6*x6),a0 //xr14 (W4*x0 + W4*x4 - W2*x2 - W6*x6),a3 D16MACF_AA_WW(xr11, xr0, xr0, xr13); // r11 (na0,a0) D16MACF_AA_WW(xr12, xr0, xr0, xr14); // r12 (na3,a3)// D16MUL_HW(xr13, xr5, xr1, xr10); //xr13(W4*nx0) xr10(W4*x0) D16MAC_SS_HW(xr13, xr5, xr3, xr10); //xr13(W4*nx0 - W4*nx4) xr10(W4*x0 - W4*x4) D16MUL_LW(xr14,xr6,xr2,xr9); //xr14 (W6*nx2) xr9(W6*x2) D16MAC_SS_HW(xr14,xr6,xr4,xr9); //xr14 (W6*nx2 - W2*nx6) xr9(W6*x2 - W2*x6) D32ADD_AS(xr13, xr13, xr14, xr14); //xr13 (W4*nx0 - W4*nx4 + W6*nx2 - W2*nx6),na1 //xr14 (W4*nx0 - W4*nx4 - W6*nx2 + W2*nx6),na2 D32ADD_AS(xr10, xr10, xr9, xr9); //xr10 (W4*x0 - W4*x4 + W6*x2 - W2*x6),a1 //xr9 (W4*x0 - W4*x4 - W6*x2 + W2*x6),a2 D16MACF_AA_WW(xr13, xr0, xr0, xr10); // r13 (na1,a1) D16MACF_AA_WW(xr14, xr0, xr0, xr9); // r14 (na2,a2)//------------------------------------------------------------------------------------- S32LDD(xr1, inptr, 0x10); // xr1 (nx1, x1) S32LDD(xr2, inptr, 0x30); // xr2 (nx3, x3) S32LDD(xr3, inptr, 0x50); // xr3 (nx5, x5) S32LDD(xr4, inptr, 0x70); // xr4 (nx7, x7)// calculate b0,b1,b2,b3// calculate b0 D16MUL_HW(xr9, xr7, xr1, xr10); //xr9(nx1 * W1) xr10(x1 * W1) D16MAC_AA_LW(xr9, xr7, xr2, xr10); //xr9(nx1*W1 + nx3*W3) xr10(x1*W1 + x3*W3) D16MAC_AA_HW(xr9, xr8, xr3, xr10); //xr9(nx1*W1 + nx3*W3 + nx5*W5) //xr10(x1*W1 + x3*W3 + x5*W5) D16MAC_AA_LW(xr9, xr8, xr4, xr10); //xr9(nx1*W1 + nx3*W3 + nx5*W5 + nx7*W7) //xr10(x1*W1 + x3*W3 + x5*W5 + nx7*W7) D16MACF_AA_WW(xr9, xr0, xr0, xr10); //xr9 (nb0,b0)// calculate b1 D16MUL_LW(xr10, xr7, xr1, xr15); //xr10(nx1 * W3) xr15(x1 * W3) D16MAC_SS_LW(xr10, xr8, xr2, xr15); //xr10(nx1*W3 - nx3*W7) xr15(x1*W3 - x3*W7) D16MAC_SS_HW(xr10, xr7, xr3, xr15); //xr10(nx1*W3 - nx3*W7 - nx5*W1) //xr15(x1*W3 - x3*W7 - x5*W1) D16MAC_SS_HW(xr10, xr8, xr4, xr15); //xr10(nx1*W3 - nx3*W7 - nx5*W1 - nx7*W5) //xr15(x1*W3 - x3*W7 - x5*W1 - nx7*W5) D16MACF_AA_WW(xr10, xr0, xr0, xr15); //xr10 (nb1,b1)// store result Q16ADD_AS_WW(xr11,xr11,xr9,xr9); //xr11(na0+nb0,a0+b0) xr9(na0-nb0, a0-b0) S32STD(xr11, inptr, 0x00); S32STD(xr9, inptr, 0x70); Q16ADD_AS_WW(xr13,xr13,xr10,xr10); //xr13(na1+nb1,a1+b1) xr10(na1-nb1, a1-b1) S32STD(xr13, inptr, 0x10); S32STD(xr10, inptr, 0x60);// calculate b2 D16MUL_HW(xr9, xr8, xr1, xr10); //xr9(nx1 * W5) xr10(x1 * W5) D16MAC_SS_HW(xr9, xr7, xr2, xr10); //xr9(nx1*W5 - nx3*W1) xr10(x1*W5 - x3*W1) D16MAC_AA_LW(xr9, xr8, xr3, xr10); //xr9(nx1*W5 - nx3*W1 + nx5*W7) //xr10(x1*W5 - x3*W1 + x5*W7) D16MAC_AA_LW(xr9, xr7, xr4, xr10); //xr9(nx1*W5 - nx3*W1 + nx5*W7 + nx7*W3) //xr10(x1*W5 - x3*W1 + x5*W7 + nx7*W3) D16MACF_AA_WW(xr9, xr0, xr0, xr10); //xr9 (nb2,b2)// calculate b3 D16MUL_LW(xr10, xr8, xr1, xr15); //xr10(nx1 * W7) xr15(x1 * W7) D16MAC_SS_HW(xr10, xr8, xr2, xr15); //xr10(nx1*W7 - nx3*W5) xr15(x1*W7 - x3*W5) D16MAC_AA_LW(xr10, xr7, xr3, xr15); //xr10(nx1*W7 - nx3*W5 + nx5*W3) //xr15(x1*W7 - x3*W5 + x5*W3) D16MAC_SS_HW(xr10, xr7, xr4, xr15); //xr10(nx1*W7 - nx3*W5 + nx5*W3 - nx7*W1) //xr15(x1*W7 - x3*W5 + x5*W3 - nx7*W1) D16MACF_AA_WW(xr10, xr0, xr0, xr15); //xr10 (nb3,b3)// store result Q16ADD_AS_WW(xr14,xr14,xr9,xr9); //xr14(na2+nb2,a2+b2) xr9(na2-nb2, a2-b2) S32STD(xr14, inptr, 0x20); S32STD(xr9, inptr, 0x50); Q16ADD_AS_WW(xr12,xr12,xr10,xr10); //xr12(na3+nb3,a3+b3) xr10(na3-nb3, a3-b3) S32LDI(xr1, inptr, 0x04); // xr1 (nx0, x0) S32STD(xr12, inptr, 0x2c); S32STD(xr10, inptr, 0x3c); } while (inptr != endptr); inptr = block; endptr = inptr + 8*8;// calculate line S32LDD(xr1, inptr, 0); // xr1 (x1, x0) do { S32LDD(xr2, inptr, 0x4); // xr2 (x3, x2) S32LDD(xr3, inptr, 0x8); // xr3 (x5, x4) S32LDD(xr4, inptr, 0xc); // xr4 (x7, x6)// calculate b0,b1,b2,b3 D16MUL_HW(xr11, xr1, xr7, xr12); // xr11 (W1*x1) xr12 (W3*x1) D16MUL_HW(xr13, xr1, xr8, xr14); // xr13 (W5*x1) xr14 (W7*x1) D16MAC_SA_HW(xr13,xr2,xr7,xr11); // xr13 (W5*x1 - W1*x3) xr11 (W1*x1 + W3*x3) D16MAC_SS_HW(xr14,xr2,xr8,xr12); // xr14 (W7*x1 - W5*x3) xr12 (W3*x1 - W7*x3) D16MAC_SA_HW(xr12,xr3,xr7,xr14); // xr12 (W3*x1 - W7*x3 - W1*x5) // xr14 (W7*x1 - W5*x3 + W3*x5) D16MAC_AA_HW(xr11,xr3,xr8,xr13); // xr11 (W1*x1 + W3*x3 + W5*x5) // xr13 (W5*x1 - W1*x3 + W7*x5)//calculate a0,a1,a2,a3 D16MUL_LW(xr9, xr1, xr5, xr10); // xr9 (W4*x0) xr10 (W4*x0) D16MAC_AS_LW(xr9,xr3,xr5,xr10); // xr9 (W4*x0 + W4*x4) xr10 (W4*x0 - W4*x4) D16MUL_LW(xr1, xr2, xr6, xr3); // xr1 (W2*x2) xr3 (W6*x2) D16MAC_SA_LW(xr3,xr4,xr6,xr1); // xr3 (W6*x2 - W2*x6) xr1 (W2*x2 + W6*x6)//schedule b0 ~ b3 D16MAC_SA_HW(xr12,xr4,xr8,xr11); // xr12 (W3*x1 - W7*x3 - W1*x5 - W5*x7),b1 // xr11 (W1*x1 + W3*x3 + W5*x5 + W7*x7),b0 D16MAC_SA_HW(xr14,xr4,xr7,xr13); // xr14 (W7*x1 - W5*x3 + W3*x5 - W1*x7),b3 // xr13 (W5*x1 - W1*x3 + W7*x5 + W3*x7),b2// continue a0 ~ a3 D32ADD_AS(xr2, xr9, xr1, xr4); //xr2 (W4*x0 + W4*x4 + W2*x2 + W6*x6),a0 //xr4 (W4*x0 + W4*x4 - W2*x2 - W6*x6),a3 D32ADD_AS(xr9,xr10,xr3, xr1); //xr9(W4*x0 - W4*x4 + W6*x2 - W2*x6),a1 //xr1(W4*x0 - W4*x4 - W6*x2 + W2*x6),a2//calculate a +/- b D32ADD_AS(xr2, xr2, xr11, xr11); //xr2(a0 + b0) xr11 (a0 - b0) D32ADD_AS(xr4, xr4, xr14, xr14); //xr4(a3 + b3) xr14 (a3 - b3) D32ADD_AS(xr9, xr9, xr12, xr12); //xr9(a1 + b1) xr12 (a1 - b1) D32ADD_AS(xr1, xr1, xr13, xr13); //xr1(a2 + b2) xr13 (a2 - b2)//padding and saturate D16MACF_AA_WW(xr9, xr0, xr0, xr2); // r9 (a1+b1, a0+b0) D16MACF_AA_WW(xr4, xr0, xr0, xr1); // r4 (a3+b3, a2+b2) D16MACF_AA_WW(xr13,xr0, xr0, xr14); // r13(a2-b2, a3-b3) D16MACF_AA_WW(xr11,xr0, xr0, xr12); // r11(a0-b0, a1-b1) Q16SAT(xr4, xr4, xr9); Q16SAT(xr11,xr11, xr13); S32LDI(xr1, inptr,0x10); // xr1 (x1, x0) S32STD(xr4, dest, 0x0); S32STD(xr11,dest, 0x4); dest += line_size; } while (inptr != endptr);}#elsevoid simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block){ int i; for(i=0; i<8; i++) idctRowCondDC(block + i*8); for(i=0; i<8; i++) idctSparseColPut(dest + i, line_size, block + i);}#endif#ifdef JZ4740_MXU_OPTvoid simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block){ DCTELEM *inptr = block, *endptr; S32I2M(xr5,WM44) ; // xr5 (W4, W4) S32I2M(xr6,WM26) ; // xr6 (W2, W6) S32I2M(xr7,WM13) ; // xr7 (W1, W3) S32I2M(xr8,WM57) ; // xr8 (W5, W7) endptr = inptr + 8;// calculate column S32LDD(xr1, inptr, 0x00); // xr1 (nx0, x0) do {// calculate a0,a1,a2,a3 S32LDD(xr2, inptr, 0x20); // xr2 (nx2, x2) S32LDD(xr3, inptr, 0x40); // xr3 (nx4, x4) S32LDD(xr4, inptr, 0x60); // xr4 (nx6, x6)// computer ah0,a0 ah3,a3 D16MUL_HW(xr11, xr5, xr1, xr13); //xr11 (W4*nx0) xr13(W4*x0) D16MAC_AA_HW(xr11, xr5, xr3, xr13); //xr11(W4*nx0 + W4*nx4) xr13(W4*x0 + W4*x4) D16MUL_HW(xr12,xr6,xr2,xr14); //xr12 (W2*nx2) xr14(W2*x2) D16MAC_AA_LW(xr12,xr6,xr4,xr14); //xr12 (W2*nx2 + W6*nx6) xr14(W2*x2 + W6*x6) D32ADD_AS(xr11, xr11, xr12, xr12); //xr11 (W4*nx0 + W4*nx4 + W2*nx2 + W6*nx6),na0 //xr12 (W4*nx0 + W4*nx4 - W2*nx2 - W6*nx6),na3 D32ADD_AS(xr13, xr13, xr14, xr14); //xr13 (W4*x0 + W4*x4 + W2*x2 + W6*x6),a0 //xr14 (W4*x0 + W4*x4 - W2*x2 - W6*x6),a3 D16MACF_AA_WW(xr11, xr0, xr0, xr13); // r11 (na0,a0) D16MACF_AA_WW(xr12, xr0, xr0, xr14); // r12 (na3,a3)// D16MUL_HW(xr13, xr5, xr1, xr10); //xr13(W4*nx0) xr10(W4*x0) D16MAC_SS_HW(xr13, xr5, xr3, xr10); //xr13(W4*nx0 - W4*nx4) xr10(W4*x0 - W4*x4) D16MUL_LW(xr14,xr6,xr2,xr9); //xr14 (W6*nx2) xr9(W6*x2) D16MAC_SS_HW(xr14,xr6,xr4,xr9); //xr14 (W6*nx2 - W2*nx6) xr9(W6*x2 - W2*x6) D32ADD_AS(xr13, xr13, xr14, xr14); //xr13 (W4*nx0 - W4*nx4 + W6*nx2 - W2*nx6),na1 //xr14 (W4*nx0 - W4*nx4 - W6*nx2 + W2*nx6),na2 D32ADD_AS(xr10, xr10, xr9, xr9); //xr10 (W4*x0 - W4*x4 + W6*x2 - W2*x6),a1 //xr9 (W4*x0 - W4*x4 - W6*x2 + W2*x6),a2 D16MACF_AA_WW(xr13, xr0, xr0, xr10); // r13 (na1,a1) D16MACF_AA_WW(xr14, xr0, xr0, xr9); // r14 (na2,a2)//------------------------------------------------------------------------------------- S32LDD(xr1, inptr, 0x10); // xr1 (nx1, x1) S32LDD(xr2, inptr, 0x30); // xr2 (nx3, x3) S32LDD(xr3, inptr, 0x50); // xr3 (nx5, x5) S32LDD(xr4, inptr, 0x70); // xr4 (nx7, x7)// calculate b0,b1,b2,b3// calculate b0 D16MUL_HW(xr9, xr7, xr1, xr10); //xr9(nx1 * W1) xr10(x1 * W1) D16MAC_AA_LW(xr9, xr7, xr2, xr10); //xr9(nx1*W1 + nx3*W3) xr10(x1*W1 + x3*W3) D16MAC_AA_HW(xr9, xr8, xr3, xr10); //xr9(nx1*W1 + nx3*W3 + nx5*W5) //xr10(x1*W1 + x3*W3 + x5*W5) D16MAC_AA_LW(xr9, xr8, xr4, xr10); //xr9(nx1*W1 + nx3*W3 + nx5*W5 + nx7*W7) //xr10(x1*W1 + x3*W3 + x5*W5 + nx7*W7) D16MACF_AA_WW(xr9, xr0, xr0, xr10); //xr9 (nb0,b0)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?