⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sinf_simd.asm

📁 在ADSP-2126x上编写的优化过的FFT程序(用c和汇编编写)。
💻 ASM
字号:
/*******************************************************************************
*
* Functions:  sinf_simd,cosf_simd,cosf_sinf_simd
*
* Synopsis:  #include "math_simd.h"
*            
* Prototypes: 
*				 	void sinf_simd(float *x,	// pointer to inputs
*										float *y);	// pointer to outputs
*					void cosf_simd(float *x,	// pointer to inputs
*										float *y);	// pointer to outputs
*					void cosf_sinf_simd(float *x,	// pointer to inputs
*											  float *y);// pointer to outputs
* Description:
*					These functions calculate the sine and cosine of 
*					multichannel inputs.  
*					sinf_simd performs the following calculations:
*						y[0]=sinf(x[0]);
*						y[1]=sinf(x[1]);
*					cosf_simd performs the following calculations:
*						y[0]=cosf(x[0]);
*						y[1]=cosf(x[1]);
*					cosf_sinf_simd performs the following calculations:
*						y[0]=cosf(x[0]);
*						y[1]=sinf(x[1]);
*
* Assumptions:
*            All arrays must start on even address boundaries.
*
* Examples:
*				#include "math_simd.h"
*				float x[2]={PI,PI/2};
*				float y[2];
*				sinf_simd(x,y);
*				cosf_simd(x,y);
*				cosf_sinf_simd(x,y);
*
*
* Cycle Counts:
*
*            Function        Cycles*
*            --------        ------
*            sinf_simd			70 
*            cosf_simd			69
*            cosf_sinf_simd	75
*
*                         *cycle counts include the basic overheads in
*                          argument passing and in calling the function
*
* Author:	
*
* Revisions:
*				Modified from SISD to SIMD.  June, 2003
*				Darrel Judd
*				Judd Labs, Inc.
*				801-756-2057
*				drjudd@ieee.org
*******************************************************************************/

#include "lib_glob.h"
#include "mth_glob.h"

.PRECISION=MACHINE_PRECISION;
.segment/dm seg_dmda;
.var SaveStack[10];
.var SaveMode1;
.SEGMENT/CODE	Code_Space_Name;
.FILE 		RTL_FILENAME;

.GLOBAL	    	_sinf_simd, _cosf_simd,_cosf_sinf_simd;
_cosf_sinf_simd:
		F12=0.5;		    	//Used later after modulo for cos
		s12 = 0.0;			//Used later after modulo for sine
		dm(SaveMode1)=mode1;
		bit set mode1 0x00200000 ;	// Turn on SIMD
		i_reg = r4;	// get inputs
		f4=dm(0,i_reg);	// store inputs in f4,s4
		f7=1.0;	/*Sign flag is set to 1*/
		f4= pass f4;
		IF LT f7=-f7;		    /*If input was negative, invert sign*/
		bit clr mode1 0x00200000 ;	// Turn off SIMD
		i_reg=SaveStack;	// setup to save registers
		f7=1.0;	// set cosine side positive always
		r0=i2;	
		dm(i_reg,1)=r0;	// save i2
		i2=r8; // save output address in i2
				
		dm(i_reg,1)=r3;	// save r3;
		bit set mode1 0x00200000 ;	// Turn on SIMD
		dm(i_reg,1)=r5;	// save r5;
		R1=PASS R4,dm(i_reg,1)=R7;	// Read x input value,save r7			 	
		
		i_reg=sine_data;
		BIT CLR MODE1 65536;	     	/*Set to 40-bit mode, */
		
		JUMP (PC, compute_modulo) (DB); //Follow sin code from here!
		F8=ABS F1,f2=dm(i_reg,4);	//Use absolute value of input,fetch pi/2 and 0
		F4=F8+F2, F2=mem(i_reg,2);	// fetch 1/pi
		
		
_cosf_simd:
		dm(SaveMode1)=mode1;
		bit set mode1 0x00200000 ;	// Turn on SIMD
		i_reg = r4;	// get inputs
		f4=dm(0,i_reg);	// store inputs in f4,s4
		i_reg=SaveStack;	// setup to save registers
		r0=i2;	
		dm(i_reg,1)=r0;	// save i2
		i2=r8; // save output address in i2
				
		dm(i_reg,1)=r3;	// save r3;
		dm(i_reg,1)=r5;	// save r5;
		R1=PASS R4,dm(i_reg,1)=R7;	// Read x input value,save r7			 
		F7=1.0;	/*Sign flag is set to 1*/
		
		i_reg=sine_data+2;
		BIT CLR MODE1 65536;	     	/*Set to 40-bit mode, */
		F8=ABS F1,f2=dm(i_reg,2);	//Use absolute value of input,fetch pi/2
		
		JUMP (PC, compute_modulo) (DB); //Follow sin code from here!
		F4=F8+F2, F2=mem(i_reg,2);	// fetch 1/pi
		F12=0.5;		    	/*Used later after modulo for cos*/
		
		

_sinf_simd:
		dm(SaveMode1)=mode1;
		bit set mode1 0x00200000;	// Turn on SIMD
		i_reg = r4;	// get inputs
		f4=dm(0,i_reg);	// store inputs in f4,s4
		i_reg=SaveStack;	// setup to save registers
		r0=i2;	
		dm(i_reg,1)=r0;	// save i2
		i2=r8;
				
		dm(i_reg,1)=r3;	// save r3;
		dm(i_reg,1)=r5;	// save r5;
		R1=PASS R4,dm(i_reg,1)=R7;	// Read x input value,save r7			 
		F7=1.0;	/*Sign flag is set to 1*/
		
		i_reg=sine_data+4;
		BIT CLR MODE1 65536;	     	/*Set to 40-bit mode, */
		F8=ABS F1;	//Use absolute value of input
			
		f8=abs f1, F2=mem(i_reg,2);
		R12=R12-R12;		/*Used later after modulo*/
		F1=PASS F1, F4=F8;
		IF LT F7=-F7;		    /*If input was negative, invert sign*/



compute_modulo:	
		F4=F4*F2;	    	    /*Compute fp modulo value*/
		R2=FIX F4;		    /*Round nearest fractional portion*/
		BTST R2 BY 0;		    /*Test for odd number*/
		IF NOT SZ F7=-F7;	    /*Invert sign if odd modulo*/
		F4=FLOAT R2;		    /*Return to fp*/
		F4=F4-F12, F2=mem(i_reg,2); /*Add cos adjust if necessary, F3=XN*/

		//compute_f:	
		F12=F2*F4, F2=mem(i_reg,2); /*Compute XN*C1*/
		F2=F2*F4, F12=F8-F12;	    /*Compute |X|-XN*C1, and XN*C2*/
		F8=F12-F2, F4=mem(i_reg,2); /*Compute f=(|X|-XN*C1)-XN*C2*/
		
		//compute_R:	
		F12=F8*F8, F4=mem(i_reg,2);
		LCNTR=6, DO compute_poly UNTIL LCE;
		    F4=F12*F4, F2=mem(i_reg,2);	/*Compute sum*g*/
compute_poly:	    F4=F2+F4;			/*Compute sum=sum+next r*/
		F4=F12*F4;			/*Final multiply by g*/
	   F4=F4*F8;		    /*Compute f*R*/
		F12=F4+F8;		    /*Compute Result=f+f*R*/
compute_sign:	
		F0=F12*F7, FETCH_RETURN	    /*Restore sign of result*/

restore_state:	
		F0=RND F0;
		dm(i2,2)=f0;
		i_reg=SaveStack;
		MODE1=dm(SaveMode1);
		r0=dm(i_reg,1);i2=r0;
		r3=dm(i_reg,1);
		r5=dm(i_reg,1);
		r7=dm(i_reg,1);
		
		RETURN (DB);
		RESTORE_STACK
		RESTORE_FRAME
._sinf_simd.end:
._cosf_simd.end:
._cosf_sinf_simd.end:
.ENDSEG;

.SEGMENT/SPACE	Data_Space_Name;

.PRECISION=MEMORY_PRECISION;

.VAR sine_data[26] =	
			1.57079632679489661923,	// pi/2
			0.0,							// 0
			1.57079632679489661923,	// pi/2
			1.57079632679489661923,	// pi/2 
			0.31830988618379067154,	/*1/PI*/
			0.31830988618379067154,	/*1/PI*/ 
			 3.14160156250000000000,	/*C1, almost PI*/
			 3.14160156250000000000,	/*C1, almost PI*/
			-8.908910206761537356617E-6,	/*C2, PI=C1+C2*/
			-8.908910206761537356617E-6,	/*C2, PI=C1+C2*/
			 9.536743164E-7,		/*eps, sin(eps)=eps*/
			 9.536743164E-7,		/*eps, sin(eps)=eps*/
			-0.737066277507114174E-12,	/*R7*/
			-0.737066277507114174E-12,	/*R7*/
			 0.160478446323816900E-9,	/*R6*/
			 0.160478446323816900E-9,	/*R6*/
			-0.250518708834705760E-7,	/*R5*/
			-0.250518708834705760E-7,	/*R5*/
			 0.275573164212926457E-5,	/*R4*/
			 0.275573164212926457E-5,	/*R4*/
			-0.198412698232225068E-3,	/*R3*/
			-0.198412698232225068E-3,	/*R3*/
			 0.833333333327592139E-2,	/*R2*/
			 0.833333333327592139E-2,	/*R2*/
			-0.166666666666659653,		/*R1*/
			-0.166666666666659653;		/*R1*/

.ENDSEG;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -