📄 rtjpegn.c
字号:
{ int ci=1, co=1, tmp; register int i; i=RTjpeg_ZZ[0]; data[i]=((__u8)strm[0])*qtbl[i]; for(co=1; co<=bt8; co++) { i=RTjpeg_ZZ[co]; data[i]=strm[ci++]*qtbl[i]; } for(; co<64; co++) { if(strm[ci]>63) { tmp=co+strm[ci]-63; for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0; co--; } else { i=RTjpeg_ZZ[co]; data[i]=strm[ci]*qtbl[i]; } ci++; } return (int)ci;}#endif#if defined(MMX)void RTjpeg_quant_init(void){ int i; __s16 *qtbl; qtbl=(__s16 *)RTjpeg_lqt; for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i]; qtbl=(__s16 *)RTjpeg_cqt; for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i];}static mmx_t RTjpeg_ones={0x0001000100010001LL};static mmx_t RTjpeg_half={0x7fff7fff7fff7fffLL};void RTjpeg_quant(__s16 *block, __s32 *qtbl){ int i; mmx_t *bl, *ql; ql=(mmx_t *)qtbl; bl=(mmx_t *)block; movq_m2r(RTjpeg_ones, mm6); movq_m2r(RTjpeg_half, mm7); for(i=16; i; i--) { movq_m2r(*(ql++), mm0); /* quant vals (4) */ movq_m2r(*bl, mm2); /* block vals (4) */ movq_r2r(mm0, mm1); movq_r2r(mm2, mm3); punpcklwd_r2r(mm6, mm0); /* 1 qb 1 qa */ punpckhwd_r2r(mm6, mm1); /* 1 qd 1 qc */ punpcklwd_r2r(mm7, mm2); /* 32767 bb 32767 ba */ punpckhwd_r2r(mm7, mm3); /* 32767 bd 32767 bc */ pmaddwd_r2r(mm2, mm0); /* 32767+bb*qb 32767+ba*qa */ pmaddwd_r2r(mm3, mm1); /* 32767+bd*qd 32767+bc*qc */ psrad_i2r(16, mm0); psrad_i2r(16, mm1); packssdw_r2r(mm1, mm0); movq_r2m(mm0, *(bl++)); }}#elsevoid RTjpeg_quant_init(void){}void RTjpeg_quant(__s16 *block, __s32 *qtbl){ int i; for(i=0; i<64; i++) block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16);}#endif/* * Perform the forward DCT on one block of samples. */#ifdef MMXstatic mmx_t RTjpeg_C4 ={0x2D412D412D412D41LL};static mmx_t RTjpeg_C6 ={0x187E187E187E187ELL};static mmx_t RTjpeg_C2mC6={0x22A322A322A322A3LL};static mmx_t RTjpeg_C2pC6={0x539F539F539F539FLL};static mmx_t RTjpeg_zero ={0x0000000000000000LL};#else#define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */#define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */#define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */#define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */#define DESCALE10(x) (__s16)( ((x)+128) >> 8)#define DESCALE20(x) (__s16)(((x)+32768) >> 16)#define D_MULTIPLY(var,const) ((__s32) ((var) * (const)))#endifvoid RTjpeg_dct_init(void){ int i; for(i=0; i<64; i++) { RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]); RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]); }}void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip){#ifndef MMX __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; __s32 tmp10, tmp11, tmp12, tmp13; __s32 z1, z2, z3, z4, z5, z11, z13; __u8 *idataptr; __s16 *odataptr; __s32 *wsptr; int ctr; idataptr = idata; wsptr = RTjpeg_ws; for (ctr = 7; ctr >= 0; ctr--) { tmp0 = idataptr[0] + idataptr[7]; tmp7 = idataptr[0] - idataptr[7]; tmp1 = idataptr[1] + idataptr[6]; tmp6 = idataptr[1] - idataptr[6]; tmp2 = idataptr[2] + idataptr[5]; tmp5 = idataptr[2] - idataptr[5]; tmp3 = idataptr[3] + idataptr[4]; tmp4 = idataptr[3] - idataptr[4]; tmp10 = (tmp0 + tmp3); /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = (tmp1 + tmp2); tmp12 = tmp1 - tmp2; wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */ wsptr[4] = (tmp10 - tmp11)<<8; z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ wsptr[2] = (tmp13<<8) + z1; /* phase 5 */ wsptr[6] = (tmp13<<8) - z1; tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ z11 = (tmp7<<8) + z3; /* phase 5 */ z13 = (tmp7<<8) - z3; wsptr[5] = z13 + z2; /* phase 6 */ wsptr[3] = z13 - z2; wsptr[1] = z11 + z4; wsptr[7] = z11 - z4; idataptr += rskip<<3; /* advance pointer to next row */ wsptr += 8; } wsptr = RTjpeg_ws; odataptr=odata; for (ctr = 7; ctr >= 0; ctr--) { tmp0 = wsptr[0] + wsptr[56]; tmp7 = wsptr[0] - wsptr[56]; tmp1 = wsptr[8] + wsptr[48]; tmp6 = wsptr[8] - wsptr[48]; tmp2 = wsptr[16] + wsptr[40]; tmp5 = wsptr[16] - wsptr[40]; tmp3 = wsptr[24] + wsptr[32]; tmp4 = wsptr[24] - wsptr[32]; tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */ odataptr[32] = DESCALE10(tmp10 - tmp11); z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */ odataptr[48] = DESCALE20((tmp13<<8) - z1); tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ z11 = (tmp7<<8) + z3; /* phase 5 */ z13 = (tmp7<<8) - z3; odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */ odataptr[24] = DESCALE20(z13 - z2); odataptr[8] = DESCALE20(z11 + z4); odataptr[56] = DESCALE20(z11 - z4); odataptr++; /* advance pointer to next column */ wsptr++; }#else volatile mmx_t tmp6, tmp7; register mmx_t *dataptr = (mmx_t *)odata; mmx_t *idata2 = (mmx_t *)idata; // first copy the input 8 bit to the destination 16 bits movq_m2r(RTjpeg_zero, mm2); movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+1)); idata2 += rskip; movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr+2)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+3)); idata2 += rskip; movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr+4)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+5)); idata2 += rskip; movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr+6)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+7)); idata2 += rskip; movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr+8)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+9)); idata2 += rskip; movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr+10)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+11)); idata2 += rskip; movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr+12)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+13)); idata2 += rskip; movq_m2r(*idata2, mm0); movq_r2r(mm0, mm1); punpcklbw_r2r(mm2, mm0); movq_r2m(mm0, *(dataptr+14)); punpckhbw_r2r(mm2, mm1); movq_r2m(mm1, *(dataptr+15));/* Start Transpose to do calculations on rows */ movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into m5 movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 movq_r2r(mm7, mm5); punpcklwd_m2r(*(dataptr+11), mm7); // m11:m01|m10:m00 - interleave first and second lines movq_r2r(mm6, mm2); punpcklwd_m2r(*(dataptr+15), mm6); // m31:m21|m30:m20 - interleave third and fourth lines movq_r2r(mm7, mm1); movq_m2r(*(dataptr+11), mm3); // m13:m13|m11:m10 - second line punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 movq_m2r(*(dataptr+15), mm0); // m13:m13|m11:m10 - fourth line punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 movq_r2m(mm7,*(dataptr+9)); // write result 1 punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines movq_r2m(mm1,*(dataptr+11)); // write result 2 punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines movq_r2r(mm5, mm1); punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 movq_r2m(mm5,*(dataptr+13)); // write result 3 // last 4x4 done movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line movq_r2r(mm0, mm6); punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines movq_r2r(mm2, mm7); punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines movq_r2r(mm0, mm4); // movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines movq_r2r(mm1, mm2); // copy first line punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines movq_r2r(mm6, mm5); // copy first intermediate result movq_r2m(mm0, *(dataptr+8)); // write result 1 punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines movq_r2r(mm3, mm0); // copy third line punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines movq_r2m(mm4, *(dataptr+10)); // write result 2 out punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result punpcklwd_m2r(*(dataptr+14), mm3); // n31:n21|n30:n20 - interleave third and fourth lines movq_r2r(mm1, mm4); movq_r2m(mm6, *(dataptr+12)); // write result 3 out punpckldq_r2r(mm3, mm1); // n30:n20|n10:n00 - produce first result punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines movq_r2r(mm2, mm6); movq_r2m(mm5, *(dataptr+14)); // write result 4 out punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result movq_r2m(mm4, *(dataptr+3)); // write result 6 out punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result movq_r2m(mm2, *(dataptr+5)); // write result 7 out movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 movq_r2m(mm6, *(dataptr+7)); // write result 8 out// Do first 4x4 quadrant, which is used in the beginning of the DCT: movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line movq_r2r(mm0, mm2); punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines movq_r2r(mm7, mm4); punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines movq_r2r(mm0, mm1); movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 movq_r2r(mm0, mm7); // write result 1 punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ movq_r2r(mm1, mm6); // write result 2 paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ movq_r2r(mm2, mm3); // copy first intermediate result psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 movq_r2m(mm7, tmp7); movq_r2r(mm2, mm5); // write result 3 movq_r2m(mm6, tmp6); punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+5 /* Stage 1 */ movq_r2r(mm3, mm4); // write result 4/************************************************************************************************ End of Transpose************************************************************************************************/ paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ movq_r2r(mm0, mm7); psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ movq_r2r(mm1, mm6); paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ paddw_r2r(mm7, mm6); // tmp12 + tmp13 /* stage 3 */ movq_m2r(tmp6, mm2); movq_r2r(mm0, mm3); psllw_i2r(2, mm6); // m8 * 2^2 paddw_r2r(mm1, mm0); pmulhw_m2r(RTjpeg_C4, mm6); // z1 psubw_r2r(mm1, mm3); movq_r2m(mm0, *dataptr); movq_r2r(mm7, mm0); /* Odd part */ movq_r2m(mm3, *(dataptr+8)); paddw_r2r(mm5, mm4); // tmp10 movq_m2r(tmp7, mm3); paddw_r2r(mm6, mm0); // tmp32 paddw_r2r(mm2, mm5); // tmp11 psubw_r2r(mm6, mm7); // tmp33 movq_r2m(mm0, *(dataptr+4)); paddw_r2r(mm3, mm2); // tmp12 /* stage 4 */ movq_r2m(mm7, *(dataptr+12)); movq_r2r(mm4, mm1); // copy of tmp10
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -