⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 math.asm

📁 一个不错的用汇编语言编写的FFT算法程序
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;******************************************************************************
; EQU   - equates and compile time constants
;******************************************************************************
ML2H    =   0bf318000h
MASKSMH =   0807fffffh          ;; mask for sign and significand
MASKSH  =   080000000h          ;; mask for sign bit


;******************************************************************************
; DATA - segment and definitions
;******************************************************************************
.DATA
Align 8
;******************************************************************************
; Vector (3DNow!) data 
;******************************************************************************
PMOne       DQ      0BF8000003F800000h      ; real4 1.0, -1.0
HalfVal     DQ      03F0000003F000000h      ; real4 0.5, 0.5
HalfMin     DQ      0BF0000003F000000h      ; real4 0.5, -0.5f
ones        DQ      03F8000003F800000h      ; real4 1.0, 1.0
twos        DQ      04000000040000000h      ; real4 2.0, 2.0
pinfs       DQ      07f8000007f800000h      ; dword PINH, PINH
smh_masks   DQ      0807fffff807fffffh      ; dword MASKSMH, MASKSMH
sign_mask   DQ      07fffffff7fffffffh      ;
sh_masks    DQ      08000000080000000h      ; dword MASKSH, MASKSH
two_126s    DQ      00000007E0000007Eh      ; dword 126, 126
negh_mask   DQ      08000000000000000h      ; dword 0, 080000000h (pxor same as pfmul by PMOne)
negh_mask2  DQ      08000000080000000h
ooln2s      DQ      03FB8AA3B3FB8AA3Bh      ; ln2 | ln2


; SINCOS specific values
pio4ht      DQ      0b97daa22bf490000h      ; -0.000241913 | -0.785156
mo56_42     DQ      0bcc30c31bc924925h      ; -0.0238095 | -0.0178571
pio4s       DQ      03f490fdb3f490fdbh      ; 0.785398 | 0.785398
mo30_20     DQ      0bd4ccccdbd088889h      ; -0.05 | -0.0333333
mo12_6      DQ      0be2aaaabbdaaaaabh      ; -0.166667 | -0.0833333
mo2s        DQ      0bf000000bf000000h      ; -0.5 | -0.5
iones       DQ      00000000100000001h      ; 1 | 1


;******************************************************************************
; Scalar (single float) data
;******************************************************************************

sgn         DD      080000000h              ; mask for sign bit
mabs        DD      07FFFFFFFh              ; mask for absolute value (~sgn)
mant        DD      0007FFFFFh              ; mask for mantissa
expo        DD      07F800000h              ; mask for exponent
one         DD      03F800000h              ; 1.0f
half        DD      03F000000h              ; 0.5f
two         DD      040000000h              ; 2.0
oob         DD      000000000h              ; "out of bounds" value
nan         DD      07fffffffh              ; "Not a number" value
n0          DD      040A008EFh
n1          DD      03DAA7B3Dh
d0          DD      0412008EFh
qq0         DD      0419D92C8h
qq1         DD      041E6BD60h
qq2         DD      041355DC0h
pp0         DD      0C0D21907h
pp1         DD      0C0B59883h
pp2         DD      0BF52C7EAh
bnd         DD      03F133333h
asp0        DD      03F6A4AA5h
asp1        DD      0BF004C2Ch
asq0        DD      040AFB829h
asq1        DD      0C0AF5123h
pio2        DD      03FC90FDBh
npio2       DD      0BFC90FDBh
ooln2       DD      03FB8AA3Bh
upper       DD      042B17218h
lower       DD      0C2AEAC50h
ln2hi       DD      03F317200h
ln2lo       DD      035BFBE8Eh
rt2         DD      03FB504F3h
edec        DD      000800000h
bias        DD      00000007Fh
c2          DD      03E18EFE2h
c1          DD      03E4CAF6Fh
c0          DD      03EAAAABDh
tl2e        DD      04038AA3Bh
maxn        DD      0FF7FFFFFh
q1          DD      043BC00B5h
p1          DD      041E77545h
q0          DD      045E451C5h
p0          DD      0451E424Bh
mine        DD      0C2FC0000h
maxe        DD      043000000h
max         DD      07F7FFFFFh              ; FLT_MAX
rle10       DD      03ede5bdbh              ; 1/ln10

; SINCOS specific values
fouropi     DD      03fa2f983h              ; 1.27324f
xmax        DD      046c90fdbh              ; 25735.9

.CODE


;******************************************************************************
; SINCOSMAC - sin/cos simultaneous computation
; Input:    mm0 - angle in radians
; Output:   mm0 - (sin|cos)
; Uses:     mm0-mm7, eax, ebx, ecx, edx, esi
; Comment:  This macro simultaneously computes sin and cos of the input
;           parameter, and returns the result packed in mm0 as (sin|cos).
;           Ultimately, this routine needs higher precision and a more
;           efficient implementation (less inter-register bank traffic).
;******************************************************************************
SINCOSMAC   Macro
        movd        eax,MM0
        movq        MM1,MM0
        movd        MM3,[mabs]
        mov         ebx,eax
        mov         edx,eax
        pand        MM0,MM3                 ;; m0 = fabs(x)
        and         ebx,080000000h          ;; get sign bit
        shr         edx,01fh
        xor         eax,ebx                 ;; sign(ebx) = sign(eax)
        cmp         eax,[xmax]
        movd        MM2,[fouropi]
        jl          short x2
        movd        MM0,[one]
        jmp         ending
        Align       16
x2:
        movq        MM1,MM0
        pfmul       MM0,MM2                 ;; mm0 = fabs(x) * 4 / PI
        movq        MM3,[pio4ht]
        pf2id       MM0,MM0
        movq        MM7,[mo56_42]
        movd        ecx,MM0
        pi2fd       MM0,MM0
        mov         esi,ecx
        movq        MM6,[mo30_20]
        punpckldq   MM0,MM0
        movq        MM5,[ones]
        pfmul       MM0,MM3
        movq        MM3,[pio4s]
        pfadd       MM1,MM0
        shr         esi,2
        punpckhdq   MM0,MM0
        xor         edx,esi
        pfadd       MM1,MM0
        test        ecx,1
        punpckldq   MM1,MM1
        jz          short x5
        pfsubr      MM1,MM3
x5:     movq        MM2,MM5
        shl         edx,01fh
        punpckldq   MM2,MM1
        pfmul       MM1,MM1
        mov         esi,ecx
        movq        MM4,[mo12_6]
        shr         esi,1
        pfmul       MM7,MM1
        xor         ecx,esi
        pfmul       MM6,MM1
        shl         esi,01fh
        pfadd       MM7,MM5
        xor         ebx,esi
        pfmul       MM4,MM1
        pfmul       MM7,MM6
        movq        MM6,[mo2s]
        pfadd       MM7,MM5
        pfmul       MM6,MM1
        pfmul       MM4,MM7
        movd        MM0,edx
        pfadd       MM4,MM5
        punpckldq   MM6,MM5
        psrlq       MM5,32
        pfmul       MM4,MM6
        punpckldq   MM0,MM0
        movd        MM1,ebx
        pfadd       MM4,MM5
        test        ecx,1
        pfmul       MM4,MM2
        jz          short x7
        punpckldq   MM5,MM4
        punpckhdq   MM4,MM5
x7:     pxor        MM4,MM1
        pxor        MM0,MM4
ending:
        EndM



;******************************************************************************
;******************************************************************************
;_TEXT   Segment Public USE32 PAGE 'CODE'


;******************************************************************************
; Routine:  a_atan
; Input:    mm0.lo
; Result:   mm0.lo
; Uses:     mm0-mm7
; Comment:
;   Compute atan(x) using MMX and 3DNow! instructions.Scalar version.
;
;   If the input has an exponent of 0xFF, the result of this routine
;   is undefined. Inputs with an exponent of 0 are treated as true
;   zeroes and return a function value of 0. Result can not overflow.
;
;   atan(x) = sign(x)*atan(abs(x). If x > 1, atan(x) = pi/2-atan(1/x)
;   atan(x) for -1 <= x <= 1 is approximated by a rational minimax
;   approximation.
;
;   Testing shows that this function has an error of less than 2.27
;   single precision ulps
;
;   input      mm0.low argument x
;   output     mm0.low result atan(x)
;   destroys   mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7
;******************************************************************************
Align   16
;Public  _a_atan

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -