📄 sinf_simd.asm
字号:
/*******************************************************************************
*
* Functions: sinf_simd,cosf_simd,cosf_sinf_simd
*
* Synopsis: #include "math_simd.h"
*
* Prototypes:
* void sinf_simd(float *x, // pointer to inputs
* float *y); // pointer to outputs
* void cosf_simd(float *x, // pointer to inputs
* float *y); // pointer to outputs
* void cosf_sinf_simd(float *x, // pointer to inputs
* float *y);// pointer to outputs
* Description:
* These functions calculate the sine and cosine of
* multichannel inputs.
* sinf_simd performs the following calculations:
* y[0]=sinf(x[0]);
* y[1]=sinf(x[1]);
* cosf_simd performs the following calculations:
* y[0]=cosf(x[0]);
* y[1]=cosf(x[1]);
* cosf_sinf_simd performs the following calculations:
* y[0]=cosf(x[0]);
* y[1]=sinf(x[1]);
*
* Assumptions:
* All arrays must start on even address boundaries.
*
* Examples:
* #include "math_simd.h"
* float x[2]={PI,PI/2};
* float y[2];
* sinf_simd(x,y);
* cosf_simd(x,y);
* cosf_sinf_simd(x,y);
*
*
* Cycle Counts:
*
* Function Cycles*
* -------- ------
* sinf_simd 70
* cosf_simd 69
* cosf_sinf_simd 75
*
* *cycle counts include the basic overheads in
* argument passing and in calling the function
*
* Author:
*
* Revisions:
* Modified from SISD to SIMD. June, 2003
* Darrel Judd
* Judd Labs, Inc.
* 801-756-2057
* drjudd@ieee.org
*******************************************************************************/
#include "lib_glob.h"
#include "mth_glob.h"
.PRECISION=MACHINE_PRECISION;
.segment/dm seg_dmda;
.var SaveStack[10];
.var SaveMode1;
.SEGMENT/CODE Code_Space_Name;
.FILE RTL_FILENAME;
.GLOBAL _sinf_simd, _cosf_simd,_cosf_sinf_simd;
_cosf_sinf_simd:
F12=0.5; //Used later after modulo for cos
s12 = 0.0; //Used later after modulo for sine
dm(SaveMode1)=mode1;
bit set mode1 0x00200000 ; // Turn on SIMD
i_reg = r4; // get inputs
f4=dm(0,i_reg); // store inputs in f4,s4
f7=1.0; /*Sign flag is set to 1*/
f4= pass f4;
IF LT f7=-f7; /*If input was negative, invert sign*/
bit clr mode1 0x00200000 ; // Turn off SIMD
i_reg=SaveStack; // setup to save registers
f7=1.0; // set cosine side positive always
r0=i2;
dm(i_reg,1)=r0; // save i2
i2=r8; // save output address in i2
dm(i_reg,1)=r3; // save r3;
bit set mode1 0x00200000 ; // Turn on SIMD
dm(i_reg,1)=r5; // save r5;
R1=PASS R4,dm(i_reg,1)=R7; // Read x input value,save r7
i_reg=sine_data;
BIT CLR MODE1 65536; /*Set to 40-bit mode, */
JUMP (PC, compute_modulo) (DB); //Follow sin code from here!
F8=ABS F1,f2=dm(i_reg,4); //Use absolute value of input,fetch pi/2 and 0
F4=F8+F2, F2=mem(i_reg,2); // fetch 1/pi
_cosf_simd:
dm(SaveMode1)=mode1;
bit set mode1 0x00200000 ; // Turn on SIMD
i_reg = r4; // get inputs
f4=dm(0,i_reg); // store inputs in f4,s4
i_reg=SaveStack; // setup to save registers
r0=i2;
dm(i_reg,1)=r0; // save i2
i2=r8; // save output address in i2
dm(i_reg,1)=r3; // save r3;
dm(i_reg,1)=r5; // save r5;
R1=PASS R4,dm(i_reg,1)=R7; // Read x input value,save r7
F7=1.0; /*Sign flag is set to 1*/
i_reg=sine_data+2;
BIT CLR MODE1 65536; /*Set to 40-bit mode, */
F8=ABS F1,f2=dm(i_reg,2); //Use absolute value of input,fetch pi/2
JUMP (PC, compute_modulo) (DB); //Follow sin code from here!
F4=F8+F2, F2=mem(i_reg,2); // fetch 1/pi
F12=0.5; /*Used later after modulo for cos*/
_sinf_simd:
dm(SaveMode1)=mode1;
bit set mode1 0x00200000; // Turn on SIMD
i_reg = r4; // get inputs
f4=dm(0,i_reg); // store inputs in f4,s4
i_reg=SaveStack; // setup to save registers
r0=i2;
dm(i_reg,1)=r0; // save i2
i2=r8;
dm(i_reg,1)=r3; // save r3;
dm(i_reg,1)=r5; // save r5;
R1=PASS R4,dm(i_reg,1)=R7; // Read x input value,save r7
F7=1.0; /*Sign flag is set to 1*/
i_reg=sine_data+4;
BIT CLR MODE1 65536; /*Set to 40-bit mode, */
F8=ABS F1; //Use absolute value of input
f8=abs f1, F2=mem(i_reg,2);
R12=R12-R12; /*Used later after modulo*/
F1=PASS F1, F4=F8;
IF LT F7=-F7; /*If input was negative, invert sign*/
compute_modulo:
F4=F4*F2; /*Compute fp modulo value*/
R2=FIX F4; /*Round nearest fractional portion*/
BTST R2 BY 0; /*Test for odd number*/
IF NOT SZ F7=-F7; /*Invert sign if odd modulo*/
F4=FLOAT R2; /*Return to fp*/
F4=F4-F12, F2=mem(i_reg,2); /*Add cos adjust if necessary, F3=XN*/
//compute_f:
F12=F2*F4, F2=mem(i_reg,2); /*Compute XN*C1*/
F2=F2*F4, F12=F8-F12; /*Compute |X|-XN*C1, and XN*C2*/
F8=F12-F2, F4=mem(i_reg,2); /*Compute f=(|X|-XN*C1)-XN*C2*/
//compute_R:
F12=F8*F8, F4=mem(i_reg,2);
LCNTR=6, DO compute_poly UNTIL LCE;
F4=F12*F4, F2=mem(i_reg,2); /*Compute sum*g*/
compute_poly: F4=F2+F4; /*Compute sum=sum+next r*/
F4=F12*F4; /*Final multiply by g*/
F4=F4*F8; /*Compute f*R*/
F12=F4+F8; /*Compute Result=f+f*R*/
compute_sign:
F0=F12*F7, FETCH_RETURN /*Restore sign of result*/
restore_state:
F0=RND F0;
dm(i2,2)=f0;
i_reg=SaveStack;
MODE1=dm(SaveMode1);
r0=dm(i_reg,1);i2=r0;
r3=dm(i_reg,1);
r5=dm(i_reg,1);
r7=dm(i_reg,1);
RETURN (DB);
RESTORE_STACK
RESTORE_FRAME
._sinf_simd.end:
._cosf_simd.end:
._cosf_sinf_simd.end:
.ENDSEG;
.SEGMENT/SPACE Data_Space_Name;
.PRECISION=MEMORY_PRECISION;
.VAR sine_data[26] =
1.57079632679489661923, // pi/2
0.0, // 0
1.57079632679489661923, // pi/2
1.57079632679489661923, // pi/2
0.31830988618379067154, /*1/PI*/
0.31830988618379067154, /*1/PI*/
3.14160156250000000000, /*C1, almost PI*/
3.14160156250000000000, /*C1, almost PI*/
-8.908910206761537356617E-6, /*C2, PI=C1+C2*/
-8.908910206761537356617E-6, /*C2, PI=C1+C2*/
9.536743164E-7, /*eps, sin(eps)=eps*/
9.536743164E-7, /*eps, sin(eps)=eps*/
-0.737066277507114174E-12, /*R7*/
-0.737066277507114174E-12, /*R7*/
0.160478446323816900E-9, /*R6*/
0.160478446323816900E-9, /*R6*/
-0.250518708834705760E-7, /*R5*/
-0.250518708834705760E-7, /*R5*/
0.275573164212926457E-5, /*R4*/
0.275573164212926457E-5, /*R4*/
-0.198412698232225068E-3, /*R3*/
-0.198412698232225068E-3, /*R3*/
0.833333333327592139E-2, /*R2*/
0.833333333327592139E-2, /*R2*/
-0.166666666666659653, /*R1*/
-0.166666666666659653; /*R1*/
.ENDSEG;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -