📄 msvc_dwt_mmx_local.h
字号:
MOVQ [EDX],MM3 ADD EAX,8 ADD EBX,8 ADD EDX,8 SUB ECX,1 JNZ loop_minus1 EMMS // Clear MMX registers for use by FPU } } else assert(0);}/*****************************************************************************//* INLINE simd_irrev_h_synth *//*****************************************************************************/inline void simd_irrev_h_synth(short int *src, short int *dst, int samples, int i_lambda, short int remainder, short int pre_offset) /* The current implementation supports only the CDF 9/7 transform. For an explanation, see the comments appearing with `mmx_irrev_v_synth'. */{ int quads = (samples+3)>>2; if (quads <= 0) return; short int q_lambda[4], q_offset[4]; q_lambda[0]=q_lambda[1]=q_lambda[2]=q_lambda[3] = remainder; q_offset[0]=q_offset[1]=q_offset[2]=q_offset[3] = pre_offset; if (i_lambda == -2) { // Integer part of lifting step factor is -2. // The actual lifting factor here should be -1.586134 __asm { MOV ECX,quads // Set up counter used for looping MOV EAX,src MOV EDX,dst MOVQ MM0,q_lambda MOVQ MM1,q_offsetloop_minus2: MOVQ MM2,[EAX]; PADDSW MM2,[EAX+2] // Add source sample 2 MOVQ MM3,[EDX] PADDSW MM3,MM2 // Here is a -1 contribution PADDSW MM3,MM2 // Here is another -1 contribution PADDSW MM2,MM1 // Add pre-offset for rounding PMULHW MM2,MM0 // Multiply by lambda and discard 16 LSB's PSUBSW MM3,MM2 // Final contribution MOVQ [EDX],MM3 ADD EAX,8 ADD EDX,8 SUB ECX,1 JNZ loop_minus2 EMMS // Clear MMX registers for use by FPU } } else if ((i_lambda == 0) && (remainder < 0)) { // Integer part of lifting step factor is 0 and factor is negative. // The actual lifting factor here should be -0.05298. This is the one // which is sensitive to overflow/underflow in the initial accumulation. q_lambda[0] = q_lambda[1] = q_lambda[2] = q_lambda[3] = q_lambda[0]<<3; q_offset[0]=q_offset[1]=q_offset[2]=q_offset[3] = 4; // A post-offset __asm { MOV ECX,quads // Set up counter used for looping MOV EAX,src MOV EDX,dst MOVQ MM0,q_lambda MOVQ MM1,q_offsetloop_0neg: MOVQ MM2,[EAX]; PMULHW MM2,MM0 // Multiply by lambda and discard 16 LSB's PXOR MM4,MM4 PSUBSW MM4,[EAX+2] // Load negated source samples 2 PMULHW MM4,MM0 // Multiply by lambda and discard 16 LSB's MOVQ MM3,[EDX] PSUBSW MM2,MM4 // Accumulate non-negated scaled sources. PADDSW MM2,MM1 // Add post-offset for rounding PSRAW MM2,3 // Divide by 8 (we've been working with 8x lambda) PSUBSW MM3,MM2 // Final contribution MOVQ [EDX],MM3 ADD EAX,8 ADD EDX,8 SUB ECX,1 JNZ loop_0neg EMMS // Clear MMX registers for use by FPU } } else if (i_lambda == 0) { // Integer part of lifting step factor is 0 and factor is positive. // The actual lifting factor here should be 0.443507 __asm { MOV ECX,quads // Set up counter used for looping MOV EAX,src MOV EDX,dst MOVQ MM0,q_lambda MOVQ MM1,q_offsetloop_0pos: MOVQ MM2,[EAX]; PADDSW MM2,[EAX+2] // Add source sample 2 MOVQ MM3,[EDX] PADDSW MM2,MM1 // Add pre-offset for rounding PMULHW MM2,MM0 // Multiply by lambda and discard 16 LSB's PSUBSW MM3,MM2 // Final contribution MOVQ [EDX],MM3 ADD EAX,8 ADD EDX,8 SUB ECX,1 JNZ loop_0pos EMMS // Clear MMX registers for use by FPU } } else if (i_lambda == 1) { // Integer part of lifting step factor is 1. // The actual lifting factor here should be 0.882911 __asm { MOV ECX,quads // Set up counter used for looping MOV EAX,src MOV EDX,dst MOVQ MM0,q_lambda MOVQ MM1,q_offsetloop_plus1: MOVQ MM2,[EAX]; PADDSW MM2,[EAX+2] // Add source sample 2 MOVQ MM3,[EDX] PSUBSW MM3,MM2 // Here is a +1 contribution PADDSW MM2,MM1 // Add pre-offset for rounding PMULHW MM2,MM0 // Multiply by lambda and discard 16 LSB's PSUBSW MM3,MM2 // Final contribution MOVQ [EDX],MM3 ADD EAX,8 ADD EDX,8 SUB ECX,1 JNZ loop_plus1 EMMS // Clear MMX registers for use by FPU } } else assert(0);}/*****************************************************************************//* INLINE simd_rev_h_synth *//*****************************************************************************/inline void simd_rev_h_synth(short int *src, short int *dst, int samples, int downshift, int i_lambda){ int quads = (samples+3)>>2; if (quads <= 0) return; short int q_offset[4]; q_offset[0] = q_offset[1] = q_offset[2] = q_offset[3] = (short int)((1<<downshift)>>1); if (i_lambda == 1) { __asm { MOV ECX,quads // Set up counter used for looping MOV EAX,src MOV EDX,dst MOVQ MM0,q_offset MOVD MM1,downshiftloop_plus1: MOVQ MM2,MM0 // start with the offset PADDSW MM2,[EAX] // add 1'st source sample PADDSW MM2,[EAX+2] // add 2'nd source sample MOVQ MM3,[EDX] PSRAW MM2,MM1 // shift rigth by the `downshift' value PSUBSW MM3,MM2 // subtract from dest sample MOVQ [EDX],MM3 ADD EAX,8 ADD EDX,8 SUB ECX,1 JNZ loop_plus1 EMMS // Clear MMX registers for use by FPU } } else if (i_lambda == -1) { __asm { MOV ECX,quads // Set up counter used for looping MOV EAX,src MOV EDX,dst MOVQ MM0,q_offset MOVD MM1,downshiftloop_minus1: MOVQ MM2,MM0 // start with the offset PSUBSW MM2,[EAX] // subtract 1'st source sample PSUBSW MM2,[EAX+2] // subtract 2'nd source sample MOVQ MM3,[EDX] PSRAW MM2,MM1 // shift rigth by the `downshift' value PSUBSW MM3,MM2 // subtract from dest sample MOVQ [EDX],MM3 ADD EAX,8 ADD EBX,8 ADD EDX,8 SUB ECX,1 JNZ loop_minus1 EMMS // Clear MMX registers for use by FPU } } else assert(0);}/*****************************************************************************//* INLINE simd_interleave *//*****************************************************************************/inline void simd_interleave(short int *src1, short int *src2, short int *dst, int pairs){ int octets = (pairs+3)>>2; if (octets <= 0) return; __asm { MOV ECX,octets // Set up counter used for looping MOV EAX,src1 MOV EBX,src2 MOV EDX,dstloop_ilv: MOVQ MM0,[EAX] MOVQ MM2,MM0 MOVQ MM1,[EBX] PUNPCKLWD MM2,MM1 MOVQ [EDX],MM2 PUNPCKHWD MM0,MM1 MOVQ [EDX+8],MM0 ADD EAX,8 ADD EBX,8 ADD EDX,16 SUB ECX,1 JNZ loop_ilv EMMS // Clear MMX registers for use by FPU }}/*****************************************************************************//* INLINE simd_upshifted_interleave *//*****************************************************************************/inline void simd_upshifted_interleave(short int *src1, short int *src2, short int *dst, int pairs, int upshift){ int octets = (pairs+3)>>2; if (octets <= 0) return; __asm { MOV ECX,octets // Set up counter used for looping MOV EAX,src1 MOV EBX,src2 MOV EDX,dst MOVD MM3,upshiftloop_shift_ilv: MOVQ MM0,[EAX] PSLLW MM0,MM3 MOVQ MM2,MM0 MOVQ MM1,[EBX] PSLLW MM1,MM3 PUNPCKLWD MM2,MM1 MOVQ [EDX],MM2 PUNPCKHWD MM0,MM1 MOVQ [EDX+8],MM0 ADD EAX,8 ADD EBX,8 ADD EDX,16 SUB ECX,1 JNZ loop_shift_ilv EMMS // Clear MMX registers for use by FPU }}#endif // MSVC_DWT_MMX_LOCAL_H
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -