wmv2.c
来自「君正早期ucos系统(只有早期的才不没有打包成库),MPLAYER,文件系统,图」· C语言 代码 · 共 1,478 行 · 第 1/4 页
C
1,478 行
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; a0 = col[8*0]; a1 = col[8*1]; a2 = col[8*2]; a3 = col[8*3]; c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); c1 = a1 * C1 + a3 * C2; c3 = a1 * C2 - a3 * C1; dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; dest += line_size; dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];}#define RN_SHIFT 15#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))#define R1 R_FIX(0.6532814824)#define R2 R_FIX(0.2705980501)#define R3 R_FIX(0.5)#define R_SHIFT 11static inline void idct4row(DCTELEM *row){ int c0, c1, c2, c3, a0, a1, a2, a3; //const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; a0 = row[0]; a1 = row[1]; a2 = row[2]; a3 = row[3]; c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); c1 = a1 * R1 + a3 * R2; c3 = a1 * R2 - a3 * R1; row[0]= (c0 + c1) >> R_SHIFT; row[1]= (c2 + c3) >> R_SHIFT; row[2]= (c2 - c3) >> R_SHIFT; row[3]= (c0 - c1) >> R_SHIFT;}/* 8x4 & 4x8 WMV2 IDCT */#undef C_FIX#undef C1#undef C2#undef C3#define C_FIX(x) ((int)((x) * /*1.414213562 */ (1 << 15) + 0.5))#define C1 C_FIX(0.6532814824)#define C2 C_FIX(0.2705980501)#define C3 C_FIX(0.5)#define wxr5 0x5A827642#define wxr6 0x5A8230FC#define wxr7 0x7D8A6A6E#define wxr9 0x6A6E18F9#define wxr10 0x471D7D8A#define wxr8 0x18F9471D#ifdef JZ4740_MXU_OPT void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block){ int i; DCTELEM *blk; /* IDCT4 and store */ S32I2M(xr11, ((C1 << 16) | (C2 & 0xffff))); // xr11 (R1, R2) S32I2M(xr12, (1 << 14)); // xr11 (R1, R2) S32I2M(xr13, (C3)) ; // xr13 (R3) for(i=0; i<4; i++) { DCTELEM *col; col = block + i*2; S32LDD(xr2, col, 0x10); // xr2 (nx1, x1) S32LDD(xr4, col, 0x30); // xr4 (nx3, x3) S32LDD(xr1, col, 0x00); // xr1 (nx0, x0) S32LDD(xr3, col, 0x20); // xr3 (nx2, x2) D16MUL_HW (xr5, xr11, xr2, xr6); // xr5 (na1*C1) xr6 (a1*C1) D16MAC_AA_LW (xr5, xr11, xr4, xr6); // xr5.nc1 (na1*C1 + na3*C2) xr6.c1 (a1*C1 + a3*C2) D16MUL_LW (xr7, xr11, xr2, xr8); // xr7 (na1*C2) xr8 (a1*C2) D16MAC_SS_HW (xr7, xr11, xr4, xr8); // xr7.nc3 (na1*C2 - na3*C1) xr8.c3 (a1*C2 - a3*C1) D16MUL_LW (xr9, xr13, xr1, xr10); // xr9 (C3*na0) xr10 (C3*a0) D16MUL_LW (xr14, xr13, xr3, xr15); // xr14 (C3*na2) xr15 (C3*a2) D32ADD_AS (xr1, xr9, xr14, xr2); // xr1.nc0 (na0 + na2)*C3 xr2.nc2 (na0-na2)*C3 D32ADD_AS (xr3, xr10, xr15, xr4); // xr3.c0 (a0 + a2)*C3 xr4.c2 (a0-a2)*C3 D32ADD_AS (xr9, xr3, xr6, xr10); // xr9: (c0 + c1) xr10: (c0 - c1) D32ACC_AS (xr9, xr12, xr0, xr10); // xr9: (c0 + c1) xr10: (c0 - c1) D32ADD_AS (xr5, xr1, xr5, xr6); // xr5: (nc0 + nc1) xr6: (nc0 - nc1) D32ACC_AS (xr5, xr12, xr0, xr6); // xr5: (nc0 + nc1) xr6: (nc0 - nc1) D32ADD_AS (xr14, xr4, xr8, xr15); // xr14: (c2 + c3) xr15: (c2 - c3) D32ACC_AS (xr14, xr12,xr0, xr15); // xr14: (c2 + c3) xr15: (c2 - c3) D32ADD_AS (xr7, xr2, xr7, xr8); // xr7: (nc2 + nc3) xr8: (nc2 - nc3) D32ACC_AS (xr7, xr12,xr0, xr8); // xr7: (nc2 + nc3) xr8: (nc2 - nc3) D32SARL(xr5, xr5, xr9, 15); // xr5 (nc0+nc1, c0+c1) S32STD (xr5, col, 0x0); D32SARL(xr7, xr7,xr14, 15); // xr7 (nc2+nc3, c2+c3) S32STD (xr7, col, 0x10); D32SARL(xr8, xr8, xr15, 15); // xr8 (nc2-nc3, c2-c3) S32STD (xr8, col, 0x20); D32SARL(xr6, xr6, xr10, 15); // xr6 (nc0-nc1, c0-c1) S32STD (xr6, col, 0x30); } S32I2M(xr5,wxr5) ; // xr5 (W4, W2) S32I2M(xr6,wxr6) ; // xr6 (W4, W6) S32I2M(xr7,wxr7) ; // xr7 (W1, W3) S32I2M(xr8,wxr8) ; // xr8 (W7, W5) S32I2M(xr9,wxr9) ; // xr9 (W3, W7) S32I2M(xr10,wxr10) ; // xr10(W5,W1) for (i = 0; i < 4; i++) /* idct rows */ { blk = block + (i << 3); S32LDD(xr1, blk, 0); // xr1 (x1, x0) S32LDD(xr2, blk, 0x4); // xr2 (x3, x2) S32LDD(xr3, blk, 0x8); // xr3 (x5, x4) S32LDD(xr4, blk, 0xc); // xr4 (x7, x6) S32SFL(xr1,xr1,xr2,xr2, ptn3); // xr1(x1, x3) xr2(x0, x2) S32SFL(xr3,xr3,xr4,xr4, ptn3); // xr3(x5, x7) xr4(x4, x6) D16MUL_WW(xr11, xr2, xr5, xr12); //xr11(w4*x0) xr12(w2*x2) D16MAC_AA_WW(xr11,xr4,xr6,xr12); //xr11(w4*x0+w4*x4) xr12:(w2*x2+w6*x6) D16MUL_WW(xr13, xr2, xr6, xr14); //xr13(w4*x0) xr14(w6*x2) D16MAC_SS_WW(xr13,xr4,xr5,xr14); //xr13(w4*x0-w4*x4) xr14:(w6*x2+w6*x6) D16MUL_HW(xr2, xr1, xr7, xr4); //xr2:w1*x1 xr4:w3*x1 D16MAC_AS_LW(xr2,xr1,xr9,xr4); //xr2:w1*x1 + w3*x3 xr4:w3*x1-w7*x3 D16MAC_AS_HW(xr2,xr3,xr10,xr4); // D16MAC_AS_LW(xr2,xr3,xr8,xr4); D16MACF_AA_WW(xr2, xr0, xr0, xr4); // D16MACF_AA_WW(xr11, xr0, xr0, xr13); // D16MACF_AA_WW(xr12, xr0, xr0, xr14); // D16MUL_HW(xr13, xr1, xr8, xr14); //w7*(x4+x5) store in xr14 ,w3*(x6+x7) store in xr15 D16MAC_SS_LW(xr13,xr1,xr10,xr14); D16MAC_AA_HW(xr13,xr3,xr9,xr14); D16MAC_SA_LW(xr13,xr3,xr7,xr14); D16MACF_AA_WW(xr14, xr0, xr0, xr13); Q16ADD_AS_WW(xr11,xr11,xr12,xr12); // Q16ADD_AS_WW(xr11, xr11, xr2, xr2); // xr11 ((a0 + b0), (a1 + b1)) // xr2 ((a0 - b0), (a1 - b1)) Q16ADD_AS_XW(xr12, xr12, xr14, xr14);// xr12 ((a2 + b2), (a3 + b3)) // xr14 ((a2 - b2), (a3 - b3)) S32SFL(xr11,xr11,xr12,xr12, ptn3); // xr11 ((a0 + b0), (a2 + b2)) // xr12 ((a1 + b1), (a3 + b3)) S32SFL(xr12,xr12,xr11,xr11, ptn3); // xr11 ((a3 + b3), (a2 + b2)) S32LDD(xr3, dest, 0); Q8ACCE_AA(xr11, xr3, xr0, xr12); Q16SAT(xr3,xr11, xr12); S32STD(xr3, dest, 0x0); S32LDD(xr3, dest, 4); Q8ACCE_AA(xr2, xr3, xr0, xr14); Q16SAT(xr3,xr2, xr14); S32STD(xr3, dest, 0x4); dest += line_size; }}#elsevoid simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block){ int i; /* IDCT8 on each line */ for(i=0; i<4; i++) { idctRowCondDC(block + i*8); } /* IDCT4 and store */ for(i=0;i<8;i++) { idct4col_add(dest + i, line_size, block + i); }}#endif#undef R_FIX(x)#undef R1#undef R2#undef R3#define R_FIX(x) ((int)((x) /* 1.414213562*/ * (1 << 15) + 0.5))#define R1 R_FIX(0.6532814824)#define R2 R_FIX(0.2705980501)#define R3 R_FIX(0.5)#ifdef JZ4740_MXU_OPTvoid simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block){ int i; DCTELEM *row; /* IDCT8 and store */ S32I2M(xr5,wxr5) ; // xr5 (W4, W2) S32I2M(xr6,wxr6) ; // xr6 (W4, W6) S32I2M(xr7,wxr7) ; // xr7 (W1, W3) S32I2M(xr8,wxr8) ; // xr8 (W7, W5) S32I2M(xr9,wxr9) ; // xr9 (W3, W7) S32I2M(xr10,wxr10) ; // xr10(W5, W1) for(i=0; i<2; i++){ DCTELEM *blk; blk = block + 2*i; S32LDD(xr1, blk, 0x00); // xr1 (x4, x0) S32LDD(xr2, blk, 0x10); // xr2 (x7, x3) S32LDD(xr3, blk, 0x20); // xr3 (x6, x1) S32LDD(xr4, blk, 0x30); // xr4 (x5, x2) S32LDD(xr11, blk, 0x40); // xr1 (x4, x0) S32LDD(xr12, blk, 0x50); // xr2 (x7, x3) S32LDD(xr13, blk, 0x60); // xr3 (x6, x1) S32LDD(xr14, blk, 0x70); // xr4 (x5, x2) S32I2M(xr5,wxr5); D16MUL_HW(xr2, xr5, xr1, xr9); //xr2(h0 * W4) xr9(x0 * W4) D16MAC_AA_HW(xr2,xr5,xr11,xr9); //xr2(h0 * W4 + h4 * W4) //xr9(x0 * W4 + x4 * W4) D16MACF_AA_WW(xr2, xr0, xr0, xr9); //xr2((h0*W4 + h4*W4), (x0*W4 + x4*W4)) D16MUL_LW(xr10, xr5, xr3, xr15); // xr10(h2 * W2) xr15(x2 * W2) D16MAC_AA_LW(xr10,xr6,xr13,xr15); // xr10(h2 * W2 + h6 * W6) // xr15(x2 * W2 + x6 * W6) D16MACF_AA_WW(xr10, xr0, xr0, xr15); // xr10((h2*W2 + h6*W6), (x2*W2 + x6*W6)) Q16ADD_AS_WW(xr2,xr2,xr10,xr9); //xr2(h0*W4 + h4*W4 + h2*W2 + h6*W6) (x0*W4 + x4*W4 + x2*W2 + x6*W6) //xr9(h0*W4 + h4*W4 - h2*W2 - h6*W6) (x0*W4 + x4*W4 - x2*W2 - x6*W6) D16MUL_HW(xr10, xr5, xr1, xr15); // xr10 (h0 * W4) xr15 (x0 * W4) D16MAC_SS_HW(xr10,xr5,xr11,xr15); // xr10 (h0 * W4 - h4 * W4) // xr15 (x0 * W4 - x4 * W4) D16MACF_AA_WW(xr10, xr0, xr0, xr15); // xr10 ((h0*W4 - h4*W4), (x0*W4 - x4*W4)) D16MUL_LW(xr11, xr6, xr3, xr1); // xr11 (h2 * W6) xr1 (x2 * W6) D16MAC_SS_LW(xr11,xr5,xr13,xr1); // xr11 (h2 * W6 - h6 * W2) // xr1 (x2 * W6 - x6 * W2) D16MACF_AA_WW(xr11, xr0, xr0, xr1); // xr11 ((h2*W6 - h6*W2), (x2*W6 - x6*W2)) Q16ADD_AS_WW(xr10,xr10,xr11,xr15); S32LDD(xr1, blk, 0x10); // xr1 (x4, x0) D16MUL_HW(xr11, xr7, xr1, xr13); // D16MAC_AA_LW(xr11,xr7,xr4,xr13); // D16MAC_AA_LW(xr11,xr8,xr12,xr13); // D16MAC_AA_HW(xr11,xr8,xr14,xr13); // D16MACF_AA_WW(xr11, xr0, xr0, xr13); // D16MUL_LW(xr3, xr7, xr1, xr13); D16MAC_SS_HW(xr3,xr8,xr4,xr13); D16MAC_SS_HW(xr3,xr7,xr12,xr13); D16MAC_SS_LW(xr3,xr8,xr14,xr13); D16MACF_AA_WW(xr3, xr0, xr0, xr13); D16MUL_LW(xr5, xr8, xr1, xr13); D16MAC_SS_HW(xr5,xr7,xr4,xr13); D16MAC_AA_HW(xr5,xr8,xr12,xr13); D16MAC_AA_LW(xr5,xr7,xr14,xr13); D16MACF_AA_WW(xr5, xr0, xr0, xr13); D16MUL_HW(xr1, xr8, xr1, xr13); D16MAC_SS_LW(xr1,xr8,xr4,xr13); D16MAC_AA_LW(xr1,xr7,xr12,xr13); D16MAC_SS_HW(xr1,xr7,xr14,xr13); D16MACF_AA_WW(xr1, xr0, xr0, xr13); Q16ADD_AS_WW(xr2,xr2,xr11,xr11); // Q16ADD_AS_WW(xr10,xr10,xr3,xr3); // Q16ADD_AS_WW(xr15,xr15,xr5,xr5); // Q16ADD_AS_WW(xr9,xr9,xr1,xr1); // S32STD(xr2, blk, 0x00); S32STD(xr3, blk, 0x60); S32STD(xr10, blk, 0x10); S32STD(xr11, blk, 0x70); S32STD(xr1, blk, 0x40); S32STD(xr9, blk, 0x30); S32STD(xr15, blk, 0x20); S32STD(xr5, blk, 0x50); } S32I2M(xr11, ((R1 << 16) | (R2 & 0xffff))); // xr11 (R1, R2) S32I2M(xr12, (1 << 14)); // xr11 (R1, R2) S32I2M(xr13, (R3)) ; // xr13 (R3) dest -= line_size; row = block; i = (unsigned int)row + 8 * 8 * 2; /* IDCT4 on each line */ while ((unsigned int)row < i) { S32LDD(xr1, row, 0x00); // xr1 (x1, x0) S32LDD(xr2, row, 0x4); // xr2 (x3, x2) S32LDD(xr14, row, 0x10); // xr14 (x1, x0) S32LDD(xr15, row, 0x14); // xr15 (x3, x2) S32SFL(xr1, xr1, xr2, xr2, ptn3); // xr1 (a1.xr1, a3.xr3) xr2 (a0.x0, a2.x2) D16MUL_HW (xr3, xr1, xr11, xr4); // xr3 (a1*R1) xr4 (a1*R2) D16MAC_SA_LW (xr4, xr1, xr11, xr3); // xr4.c3 (a1*R2 - a3*R1) xr3.c1 (a1*R1 + a3*R2) D16MUL_LW(xr5, xr13, xr2, xr6); // xr5 (R3*a0) xr6 (R3*a2) D32ADD_AS(xr5, xr5, xr6, xr6); // xr5.c0 (a0 + a2) *R3 xr6.c2 (a0-a2) *R3 D32ADD_AS (xr9, xr5, xr3, xr10); // xr9: (c0 + c1) xr10: (c0 - c1) D32ADD_AS (xr7, xr6, xr4, xr8); // xr7: (c2 + c3) xr8: (c2 - c3) D32ACC_AS (xr7, xr12, xr0, xr9); // xr7: (c2 + c3) xr9: (c0 + c1) D32ACC_AS (xr10,xr12, xr0, xr8); // xr10: (c0 - c1) xr8: (c2 - c3) S32LDIV(xr1, dest, line_size, 0x00); // xr1 (dest3,dest2,dest1,dest0) D32SARL(xr7, xr7, xr9, 15); // xr7 (c2+c3, c0+c1) D32SARL(xr8, xr10,xr8, 15); // xr8 (c0-c1, c2-c3) Q8ACCE_AA(xr8, xr1, xr0, xr7);// S32SFL(xr14, xr14, xr15, xr15, ptn3); // xr14 (a1.xr1, a3.xr3) xr15 (a0.x0, a2.x2) D16MUL_HW (xr3, xr14, xr11, xr4); // xr3 (a1*R1) xr4 (a1*R2) D16MAC_SA_LW (xr4, xr14, xr11, xr3); // xr4.c3 (a1*R2 - a3*R1) xr3.c1 (a1*R1 + a3*R2) D16MUL_LW(xr5, xr13, xr15, xr6); // xr5 (R3*a0) xr6 (R3*a2) D32ADD_AS(xr5, xr5, xr6, xr6); // xr5.c0 (a0 + a2) *R3 xr6.c2 (a0-a2) *R3 D32ADD_AS (xr1, xr5, xr3, xr2); // xr1: (c0 + c1) xr2: (c0 - c1) D32ADD_AS (xr3, xr6, xr4, xr4); // xr3: (c2 + c3) xr4: (c2 - c3) D32ACC_AS (xr3, xr12, xr0, xr1); // xr3: (c2 + c3) xr1: (c0 + c1) D32ACC_AS (xr2,xr12, xr0, xr4); // xr2: (c0 - c1) xr4: (c2 - c3) S32LDDV(xr5, dest, line_size, 0x00); // xr1 (dest3,dest2,dest1,dest0) D32SARL(xr3, xr3, xr1, 15); // xr3 (c2+c3, c0+c1) D32SARL(xr4, xr2,xr4, 15); // xr4 (c0-c1, c2-c3) Q8ACCE_AA(xr4, xr5, xr0, xr3);// store Q16SAT(xr8, xr8, xr7); Q16SAT(xr4, xr4, xr3); S32STD(xr8, dest, 0); S32SDIV(xr4, dest, line_size, 0); row += 16; }}#elsevoid simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block){ int i; /* IDCT4 on each line */ for(i=0; i<8; i++) { idct4row(block + i*8); } /* IDCT8 and store */ for(i=0; i<4; i++){ idctSparseColAdd(dest + i, line_size, block + i); }}#endif#endif
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?