📄 vc1dsp.c
字号:
D16MUL_LW(xr7,xr2,xr14,xr8); //xr7:16*src[2] xr8:6*src[2] D16MAC_SA_LW(xr8,xr4,xr14,xr7); //xr8:t4 6*src[2]-16*src[6] xr7:t3 16*src[2]+6*src[6] D32ADD_AS(xr5,xr5,xr7,xr7); //xr5:t5 t1+t3 xr7:t8 t1-t3 D32ADD_AS(xr6,xr6,xr8,xr8); //xr6:t6 t2+t4 xr8:t7 t2-t4 S32I2M(xr9,4); D32ACC_AS(xr5,xr9,xr0,xr7); D32ACC_AS(xr6,xr9,xr0,xr8); D16MUL_HW(xr9,xr1,xr13,xr10); //xr9:16*src[1] xr10:15*src[1] D16MUL_HW(xr11,xr1,xr12,xr1); //xr11:9*src[1] xr1:4*src[1] D16MAC_SA_HW(xr11,xr2,xr13,xr9);//xr11:9*src[1]-16*src[3] xr9:16*src[1]+15*src[3] D16MAC_SS_HW(xr1,xr2,xr12,xr10);//xr1:4*src[1]-9*src[3] xr10:15*src[1]-4*src[3] D16MAC_SA_HW(xr10,xr3,xr13,xr1);//xr10:15*src[1]-4*src[3]-16*src[5] xr1:4*src[1]-9*src[3]+15*src[5] D16MAC_AA_HW(xr9,xr3,xr12,xr11);//xr9:16*src[1]+15*src[3]+ 9*src[5] xr11:9*src[1]-16*src[3]+4*src[5] D16MAC_SA_HW(xr1,xr4,xr13,xr11);//xr1:t4 4*src[1]-9*src[3]+15*src[5]-16*src[7] xr11:t3 9*src[1]-16*src[3]+4*src[5]+15*src[7] D16MAC_SA_HW(xr10,xr4,xr12,xr9);//xr10:t2 15*src[1]-4*src[3]-16*src[5]-9*src[7] xr9:t1 16*src[1]+15*src[3]+ 9*src[5]+4*src[7] D32ADD_AS(xr5,xr5,xr9,xr9); //xr5:t5+t1 xr9:t5-t1 D32ADD_AS(xr6,xr6,xr10,xr10); //xr6:t6+t2 xr10:t6-t2 D32ADD_AS(xr8,xr8,xr11,xr11); //xr8:t7+t3 xr11:t7-t3 D32ADD_AS(xr7,xr7,xr1,xr1); //xr7:t8+t4 xr1:t8-t4 D32SARL(xr5,xr6,xr5,3); D32SARL(xr6,xr7,xr8,3); D32SARL(xr7,xr11,xr1,3); D32SARL(xr8,xr9,xr10,3); S32STD(xr5,dst,0x0); S32STD(xr6,dst,0x4); S32STD(xr7,dst,0x8); S32STD(xr8,dst,0xc); src+=8; dst+=8; } src = block + off; dst = block + off; for(i = 0; i < 4; i++){ S32LDD(xr1,src,0x00); //xr1:src[1] src[0] S32LDD(xr2,src,0x10); //xr2:src[9] src[8] S32LDD(xr3,src,0x20); //xr3:src[17] src[16] S32LDD(xr4,src,0x30); //xr4:src[25] src[24] Q16ADD_AS_WW(xr5,xr1,xr3,xr6); //xr5:src[1]+src[17] src[0]+src[16] xr6:src[1]-src[17] src[0]-src[16] Q16SLL(xr7,xr5,xr6,xr8,4); //xr7:(src[1]+src[17])<<4 (src[0]+src[16])<<4 xr8:(src[1]-src[17])<<4 (src[0]-src[16])<<4 Q16ADD_AS_WW(xr5,xr7,xr5,xr0); //xr5:pt1 t1 Q16ADD_AS_WW(xr6,xr8,xr6,xr0); //xr6:pt2 t2 S32I2M(xr11,64<<16|64); Q16ACC_AS(xr5,xr11,xr0,xr6); Q16SLL(xr1,xr2,xr4,xr3,3); //xr1:src[9]<<3 src[8]<<3 xr3:src[25]<<3 src[24]<<3 Q16SLL(xr7,xr2,xr4,xr8,1); //xr7:src[9]<<1 src[8]<<1 xr8:src[25]<<1 src[24]<<1 Q16ADD_AS_WW(xr9,xr1,xr7,xr0); //xr9:pt5 t5 Q16ADD_AS_WW(xr10,xr3,xr8,xr0);//xr10:pt6 t6 Q16SLL(xr1,xr1,xr3,xr3,1); //xr1:src[9]<<16 src[8]<<16 xr3:src[25]<<16 src[24]<<16 Q16SLL(xr2,xr2,xr4,xr4,2); //xr2:src[9]<<2 src[8]<<2 xr4:src[25]<<2 src[24]<<2 Q16ADD_AS_WW(xr1,xr1,xr2,xr0); Q16ADD_AS_WW(xr3,xr3,xr4,xr0); Q16ADD_AS_WW(xr1,xr1,xr7,xr0); //xr1:pt3 t3 Q16ADD_AS_WW(xr3,xr3,xr8,xr0); //xr3:pt4 t4 Q16ADD_AS_WW(xr5,xr5,xr1,xr1); //xr5:pt1+pt3 t1+t3 xr1:pt1-pt3 t1-t3 Q16ADD_AS_WW(xr6,xr6,xr3,xr3); //xr6:pt2+pt4 t2+t4 xr3:pt2-pt4 t2-t4 Q16ADD_AS_WW(xr5,xr5,xr10,xr0); //xr5:pt1+pt3+pt6 t1+t3+t6 Q16ADD_AS_WW(xr0,xr1,xr10,xr1); //xr1:pt1-pt3-pt6 t1-t3-t6 Q16ADD_AS_WW(xr3,xr3,xr9,xr0); //xr3:pt2-pt4+pt5 t2-t4+t5 Q16ADD_AS_WW(xr0,xr6,xr9,xr6); //xr6:pt2+pt4-pt5 t2+t4-t5 Q16SAR(xr5,xr5,xr1,xr1,7); Q16SAR(xr3,xr3,xr6,xr6,7); S32STD(xr5,dst,0x00); S32STD(xr3,dst,0x10); S32STD(xr6,dst,0x20); S32STD(xr1,dst,0x30); src +=2; dst +=2; }}#elsestatic void vc1_inv_trans_8x4_c(DCTELEM block[64], int n){ int i; DCTELEM *src, *dst; int off; register int t1,t2,t3,t4,t5,t6,t7,t8; off = n * 32; src = block + off; dst = block + off; for(i = 0; i < 4; i++){ t1 = 12 * (src[0] + src[4]); t2 = 12 * (src[0] - src[4]); t3 = 16 * src[2] + 6 * src[6]; t4 = 6 * src[2] - 16 * src[6]; t5 = t1 + t3; t6 = t2 + t4; t7 = t2 - t4; t8 = t1 - t3; t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; dst[0] = (t5 + t1 + 4) >> 3; dst[1] = (t6 + t2 + 4) >> 3; dst[2] = (t7 + t3 + 4) >> 3; dst[3] = (t8 + t4 + 4) >> 3; dst[4] = (t8 - t4 + 4) >> 3; dst[5] = (t7 - t3 + 4) >> 3; dst[6] = (t6 - t2 + 4) >> 3; dst[7] = (t5 - t1 + 4) >> 3; src += 8; dst += 8; } src = block + off; dst = block + off; for(i = 0; i < 8; i++){ t1 = 17 * (src[ 0] + src[16]); t2 = 17 * (src[ 0] - src[16]); t3 = 22 * src[ 8]; t4 = 22 * src[24]; t5 = 10 * src[ 8]; t6 = 10 * src[24]; dst[ 0] = (t1 + t3 + t6 + 64) >> 7; dst[ 8] = (t2 - t4 + t5 + 64) >> 7; dst[16] = (t2 + t4 - t5 + 64) >> 7; dst[24] = (t1 - t3 - t6 + 64) >> 7; src ++; dst ++; }}#endif/** Do inverse transform on 4x8 parts of block*/#ifdef JZ4740_MXU_OPTstatic void vc1_inv_trans_4x8_c(DCTELEM block[64], int n){ int i; DCTELEM *src, *dst; int off; off = n * 4; src = block + off; dst = block + off; S32I2M(xr15,W7<<16|W8); //xr15:17|17 S32I2M(xr14,W8<<16|W9); //xr14:22|10 for(i = 0; i < 8; i++){ S32LDD(xr1,src,0x0); //xr1:src[1] src[0] S32LDD(xr2,src,0x4); //xr2:src[3] src[2] D16MUL_LW(xr3,xr1,xr15,xr4); //xr3:17*src[0] xr4:17*src[0] D16MAC_AS_LW(xr3,xr2,xr15,xr4); //xr3:t1 17*src[0]+src[2]*17 //xr4:t2 17*src[0]-17*src[2] S32I2M(xr13,4); D32ACC_AS(xr3,xr13,xr0,xr4); //xr3:t1+4 xr4:t2+4 D16MUL_HW(xr5,xr1,xr14,xr6); //xr5:t3 22*src[1] xr6:t5 10*src[1] D16MUL_HW(xr7,xr2,xr14,xr8); //xr7:t4 22*src[3] xr8:t6 10*src[3] D32ADD_AS(xr5,xr5,xr8,xr0); //xr5:t3+t6 D32ADD_AS(xr0,xr7,xr6,xr6); //xr6:t4-t5 D32ADD_AS(xr3,xr3,xr5,xr5); //xr3:t1+4+t3+t6 xr5:t1+4-t3-t6 D32ADD_AS(xr4,xr4,xr6,xr6); //xr4:t2+4+t4-t5 xr6:t2+4-t4+t5 D32SARL(xr3,xr6,xr3,3); D32SARL(xr5,xr5,xr4,3); S32STD(xr3,dst,0x0); S32STD(xr5,dst,0x4); src += 8; dst += 8; } src = block + off; dst = block + off; for(i = 0; i < 2; i++){ S32LDD(xr1,src,0x00); //xr1:src[1] src[0] S32LDD(xr2,src,0x10); //xr2:src[9] src[8] S32LDD(xr3,src,0x20); //xr3:src[17] src[16] S32LDD(xr4,src,0x30); //xr4:src[25] src[24] S32LDD(xr5,src,0x40); //xr5:src[33] src[32] S32LDD(xr6,src,0x50); //xr6:src[41] src[40] S32LDD(xr7,src,0x60); //xr7:src[49] src[48] S32LDD(xr8,src,0x70); //xr8:src[57] src[56] Q16ADD_AS_WW(xr9,xr1,xr5,xr10); //xr9:src[1]+src[33] src[0]+src[32] xr10:src[1]-src[33] src[0]-src[32] Q16SLL(xr11,xr9,xr10,xr12,3); //xr11:(src[1]+src[33])<<3 (src[0]+src[32])<<3 xr12:(src[1]-src[33])<<3 (src[0]-src[32])<<3 Q16SLL(xr13,xr9,xr10,xr14,2); //xr13:(src[1]+src[33])<<2 (src[0]+src[32])<<2 xr14:(src[1]-src[33])<<2 (src[0]-src[32])<<2 Q16ADD_AS_WW(xr11,xr11,xr13,xr0);//xr11:pt1 (src[1]+src[33])<<3+(src[1]+src[33])<<2 t1 (src[0]+src[32])<<3+(src[0]+src[32])<<2 Q16ADD_AS_WW(xr12,xr12,xr14,xr0);//xr12:pt2 (src[1]-src[33])<<3+(src[1]-src[33])<<2 t2 (src[0]-src[32])<<3+(src[0]-src[32])<<2 Q16SLL(xr9,xr3,xr7,xr10,4); //xr9:src[17]<<4 src[16]<<4 xr10:src[49]<<4 src[48]<<4 Q16SLL(xr13,xr3,xr7,xr14,3); //xr13:src[17]<<3 src[16]<<3 xr14:src[49]<<3 src[48]<<3 Q16SLL(xr15,xr3,xr7,xr1,1); //xr15:src[17]<<1 src[16]<<1 xr1:src[49]<<1 src[48]<<1 Q16ADD_AS_WW(xr9,xr9,xr14,xr0); //xr9:src[17]<<4+src[49]<<3 src[16]<<4+src[48]<<3 Q16ADD_AS_WW(xr0,xr9,xr1,xr9); //xr9:pt3 src[17]<<4+src[49]<<3-src[49]<<1 t3 src[16]<<4+src[48]<<3-src[48]<<1 Q16ADD_AS_WW(xr0,xr13,xr15,xr13); //xr13:src[17]<<3-src[17]<<1 src[16]<<3-src[16]<<1 Q16ADD_AS_WW(xr0,xr13,xr10,xr13); //xr13:pt4 src[17]<<3-src[17]<<1-src[49]<<4 t4 src[16]<<3-src[16]<<1-src[48]<<4 Q16ADD_AS_WW(xr11,xr11,xr9,xr9); //xr11:pt5 t5 xr9:pt8 t8 Q16ADD_AS_WW(xr12,xr12,xr13,xr13); //xr12:pt6 t6 xr13:pt7 t7 Q16ADD_AS_WW(xr1,xr6,xr4,xr3); //xr1:src[41]+src[25] src[40]+src[24] xr3:src[41]-src[25] src[40]-src[24] Q16SLL(xr10,xr2,xr4,xr14,4); //xr10:src[9]<<4 src[8]<<4 xr14:src[25]<<4 src[24]<<4 Q16SLL(xr15,xr6,xr8,xr5,4); //xr15:src[41]<<4 src[40]<<4 xr5:src[57]<<4 src[56]<<4 Q16SLL(xr10,xr2,xr4,xr14,4); //xr10:src[9]<<4 src[8]<<4 xr14:src[25]<<4 src[24]<<4 Q16SLL(xr15,xr6,xr0,xr0,3); //xr15:src[41]<<3 src[40]<<3 Q16SLL(xr5,xr8,xr0,xr0,2); //xr5:src[57]<<2 src[56]<<2 Q16ADD_AS_WW(xr7,xr10,xr14,xr0); //xr7:src[9]<<4 + src[25]<<4 src[8]<<4 + src[24]<<4 Q16ADD_AS_WW(xr5,xr15,xr5,xr0); //xr5:src[41]<<3+src[57]<<2 src[40]<<3+src[56]<<2 Q16ADD_AS_WW(xr5,xr7,xr5,xr0); //xr5:src[9]<<4 + src[25]<<4+src[41]<<3+src[57]<<2 src[8]<<4 + src[24]<<4+src[40]<<3+src[56]<<2 Q16ADD_AS_WW(xr5,xr5,xr3,xr0); //xr5:pt1 t1 Q16SLL(xr10,xr6,xr8,xr14,4); //xr10:src[41]<<4 src[40]<<4 xr14:src[57]<<4 src[56]<<4 Q16SLL(xr15,xr4,xr0,xr0,3); //xr15:src[25]<<3 src[24]<<3 Q16SLL(xr3,xr2,xr0,xr0,2); //xr3: src[9]<<2 src[8]<<2 Q16ADD_AS_WW(xr0,xr10,xr14,xr7); //xr7: src[41]<<4-src[57]<<4 src[40]<<4-src[56]<<4 Q16ADD_AS_WW(xr0,xr7,xr15,xr7); //xr7: src[41]<<4-src[57]<<4-src[25]<<3 src[40]<<4-src[56]<<4-src[24]<<3 Q16ADD_AS_WW(xr7,xr7,xr3,xr0); //xr7:src[41]<<4-src[57]<<4-src[25]<<3+src[9] src[40]<<4-src[56]<<4-src[24]<<3+src[8] Q16ADD_AS_WW(xr0,xr7,xr1,xr7); //xr7:pt4 t4 Q16ADD_AS_WW(xr1,xr8,xr2,xr3); //xr1:src[57]+src[9] src[56]+src[8] xr3:src[57]-src[9] src[56]-src[8] Q16SLL(xr10,xr2,xr6,xr14,4); //xr10:src[9]<<4 src[8]<<4 xr14:src[41]<<4 src[40]<<4 Q16SLL(xr15,xr8,xr0,xr0,3); //xr15:src[57]<<3 src[56]<<3 Q16ADD_AS_WW(xr0,xr10,xr14,xr10); //xr10:src[9]<<4-src[41]<<4 src[8]<<4-src[40]<<4 Q16ADD_AS_WW(xr0,xr10,xr15,xr10); //xr10:src[9]<<4-src[41]<<4-src[57]<<3 src[8]<<4-src[40]<<4-src[56]<<3 Q16SLL(xr15,xr4,xr0,xr0,2); //xr15:src[25]<<2 src[24]<<2 Q16ADD_AS_WW(xr0,xr10,xr15,xr10); //xr10:src[9]<<4-src[41]<<4-src[57]<<3-src[25]<<2 src[8]<<4-src[40]<<4-src[56]<<3-src[24]<<2 Q16ADD_AS_WW(xr0,xr10,xr1,xr1); //xr1:pt2 t2 Q16SLL(xr10,xr8,xr4,xr14,4); //xr10:src[57]<<4 src[56]<<4 xr14:src[25]<<4 src[24]<<4 Q16SLL(xr15,xr2,xr0,xr0,3); //xr15:src[9]<<3 src[8]<<3 Q16ADD_AS_WW(xr0,xr10,xr14,xr10); //xr10:src[57]<<4-src[25]<<4 src[56]<<4-src[24]<<4 Q16ADD_AS_WW(xr10,xr10,xr15,xr0); //xr10:src[57]<<4-src[25]<<4+src[9]<<3 src[56]<<4-src[24]<<4+src[8]<<3 Q16SLL(xr15,xr6,xr0,xr0,2); //xr15:src[41]<<2 src[40]<<2 Q16ADD_AS_WW(xr10,xr10,xr15,xr0); Q16ADD_AS_WW(xr3,xr10,xr3,xr0); //xr3:pt3 t3*/ Q16ADD_AS_WW(xr11,xr11,xr5,xr5); //xr11:pt5+pt1 t5+t1 xr5:pt5-pt1 t5-t1 Q16ADD_AS_WW(xr12,xr12,xr1,xr1); //xr12:pt6+pt2 t6+t2 xr1:pt6-pt2 t6-t2 Q16ADD_AS_WW(xr13,xr13,xr3,xr3); //xr13:pt7+pt3 t7+t3 xr3:pt7-pt3 t7-t3 Q16ADD_AS_WW(xr9,xr9,xr7,xr7); //xr9: pt8+pt4 t8+t4 xr7:pt8-pt4 t8-t4 S32I2M(xr2,64<<16|64); Q16ACC_AS(xr11,xr2,xr0,xr12); //xr11:pt5+pt1+64 t5+t1+64 xr12:pt6+pt2+64 t6+t2+64 Q16ACC_AS(xr13,xr2,xr0,xr9); //xr13:pt7+pt3+64 t7+t3+64 xr9: pt8+pt4+64 t8+t4+64 S32I2M(xr4,65<<16|65); Q16ACC_AS(xr5,xr4,xr0,xr1); //xr5:pt5-pt1+65 t5-t1+65 xr1:pt6-pt2+65 t6-t2+65 Q16ACC_AS(xr3,xr4,xr0,xr7); //xr3:pt7-pt3+65 t7-t3+65 xr7:pt8-pt4+65 t8-t4+65 Q16SAR(xr11,xr11,xr12,xr12,7); //xr11:dst[0] xr12:dst[8] Q16SAR(xr13,xr13,xr9,xr9,7); //xr13:dst[16] xr9:dst[24] Q16SAR(xr7,xr7,xr3,xr3,7); //xr7:dst[32] xr3: dst[40] Q16SAR(xr1,xr1,xr5,xr5,7); //xr1:dst[48] xr5: dst[56] S32STD(xr11,dst,0x00); S32STD(xr12,dst,0x10); S32STD(xr13,dst,0x20); S32STD(xr9,dst,0x30); S32STD(xr7,dst,0x40); S32STD(xr3,dst,0x50); S32STD(xr1,dst,0x60); S32STD(xr5,dst,0x70); src+=2; dst+=2; }}#elsestatic void vc1_inv_trans_4x8_c(DCTELEM block[64], int n){ int i; DCTELEM *src, *dst; int off; register int t1,t2,t3,t4,t5,t6,t7,t8; off = n * 4; src = block + off; dst = block + off; for(i = 0; i < 8; i++){ t1 = 17 * (src[0] + src[2]); t2 = 17 * (src[0] - src[2]); t3 = 22 * src[1]; t4 = 22 * src[3]; t5 = 10 * src[1]; t6 = 10 * src[3]; dst[0] = (t1 + t3 + t6 + 4) >> 3; dst[1] = (t2 - t4 + t5 + 4) >> 3; dst[2] = (t2 + t4 - t5 + 4) >> 3; dst[3] = (t1 - t3 - t6 + 4) >> 3; src += 8; dst += 8; } src = block + off; dst = block + off; for(i = 0; i < 4; i++){ t1 = 12 * (src[ 0] + src[32]); t2 = 12 * (src[ 0] - src[32]); t3 = 16 * src[16] + 6 * src[48]; t4 = 6 * src[16] - 16 * src[48]; t5 = t1 + t3; t6 = t2 + t4; t7 = t2 - t4; t8 = t1 - t3; t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; dst[ 0] = (t5 + t1 + 64) >> 7; dst[ 8] = (t6 + t2 + 64) >> 7; dst[16] = (t7 + t3 + 64) >> 7; dst[24] = (t8 + t4 + 64) >> 7; dst[32] = (t8 - t4 + 64 + 1) >> 7; dst[40] = (t7 - t3 + 64 + 1) >> 7; dst[48] = (t6 - t2 + 64 + 1) >> 7; dst[56] = (t5 - t1 + 64 + 1) >> 7; src++; dst++; }}#endif#ifdef JZ4740_MXU_OPT
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -