📄 sincos.asm
字号:
comment ~
Copyright (C) 2008 Rouslan Dimitrov
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
~
PI equ 3.141592653589793f
TWO_PI equ 6.283185307179586f
ONE_OVER_TWO_PI equ 0.159154943091895f
HALF_PI equ 1.570796326794897f
SIGN_BIT equ 2147483648
vec4 struct
x real4 ?
y real4 ?
z real4 ?
w real4 ?
vec4 ends
.686
.model flat
.xmm
.data
constants segment readonly para
c5 vec4 {-2.5052108e-8f, -2.5052108e-8f, -2.7557319e-7f, -2.7557319e-7f }
c4 vec4 { 2.7557319e-6f, 2.7557319e-6f, 2.4801587e-5f, 2.4801587e-5f }
c3 vec4 {-1.9841270e-4f, -1.9841270e-4f, -1.3888889e-3f, -1.3888889e-3f }
c2 vec4 { 8.3333333e-3f, 8.3333333e-3f, 4.1666667e-2f, 4.1666667e-2f }
c1 vec4 {-1.6666667e-1f, -1.6666667e-1f, -5.0000000e-1f, -5.0000000e-1f }
c0 vec4 { 1.0000000e+0f, 1.0000000e+0f, 1.0000000e+0f, 1.0000000e+0f }
c_one_over_two_pi real4 ONE_OVER_TWO_PI
c_pi real4 PI
c_two_pi real4 TWO_PI
c_half_pi real4 HALF_PI
c_pi4 vec4 { PI, PI, PI, PI }
c_two_pi4 vec4 { TWO_PI, TWO_PI, TWO_PI, TWO_PI }
c_one_over_two_pi4 vec4 { ONE_OVER_TWO_PI, ONE_OVER_TWO_PI, ONE_OVER_TWO_PI, ONE_OVER_TWO_PI }
c_half_pi4 vec4 { HALF_PI, HALF_PI, HALF_PI, HALF_PI }
c_sign_bit4 dword 4 dup (SIGN_BIT)
c_cos_sign_bit4 dword 0, 0, SIGN_BIT, SIGN_BIT
constants ends
.code
sincos equ ?SinCos@@YI?AVvec4@@AAM@Z
sincos_360 equ ?SinCos_360@@YI?AVvec4@@AAM@Z
sincos4 equ ?SinCos4@@YI?AVvec4@@AAV1@@Z
sincos4_360 equ ?SinCos4_360@@YI?AVvec4@@AAV1@@Z
sincos_360 proc near
movss xmm0, dword ptr [edx]
movss xmm1, xmm0
mulss xmm1, xmm0
movss xmm2, [c5.x]
movss xmm3, [c5.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c4.x]
addss xmm3, [c4.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c3.x]
addss xmm3, [c3.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c2.x]
addss xmm3, [c2.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c1.x]
addss xmm3, [c1.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c0.x]
addss xmm3, [c0.x]
mulss xmm2, xmm0
unpcklps xmm2, xmm3
movaps [ecx], xmm2
mov eax, ecx
ret
sincos_360 endp
sincos proc
;fld dword ptr [edx]
;fsincos
;fstp dword ptr [ecx]
;fstp dword ptr [ecx+4]
;mov eax, ecx
;ret
movss xmm0, dword ptr [edx] ; rotate angle to [-pi/2; pi/2]
movss xmm1, xmm0
mulss xmm1, [c_one_over_two_pi]
cvttss2si eax, xmm1
cvtsi2ss xmm1, eax
mulss xmm1, [c_two_pi]
subss xmm0, xmm1
movss xmm2, xmm0 ; map to [-pi/2; pi/2]
andps xmm0, [c_sign_bit4] ; sign(a)
xorps xmm2, xmm0 ; fabs(a)
movss xmm1, [c_pi]
subss xmm1, xmm2 ; pi - fabs(a)
movss xmm3, xmm2
cmpnltss xmm3, [c_half_pi]
movss xmm7, [c_sign_bit4]
andps xmm7, xmm3
movss xmm4, xmm2
cmpltss xmm4, [c_half_pi]
andps xmm3, xmm1
andps xmm4, xmm2
addss xmm3, xmm4 ; (fabs(a) > pi/2) ? pi - fabs(a) : fabs(a)
orps xmm0, xmm3
movss xmm1, xmm0 ; Taylor
mulss xmm1, xmm0
movss xmm2, [c5.x]
movss xmm3, [c5.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c4.x]
addss xmm3, [c4.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c3.x]
addss xmm3, [c3.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c2.x]
addss xmm3, [c2.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c1.x]
addss xmm3, [c1.z]
mulss xmm2, xmm1
mulss xmm3, xmm1
addss xmm2, [c0.x]
addss xmm3, [c0.x]
mulss xmm2, xmm0
xorps xmm3, xmm7 ; flip cosines if needed
unpcklps xmm2, xmm3
movaps [ecx], xmm2
mov eax, ecx
ret
sincos endp
sincos4_360 proc near
movaps xmm0, [edx]
movaps xmm2, xmm0
mulps xmm2, xmm0
movaps xmm1, xmm2
mulps xmm2, [c5]
addps xmm2, [c4]
mulps xmm2, xmm1
addps xmm2, [c3]
mulps xmm2, xmm1
addps xmm2, [c2]
mulps xmm2, xmm1
addps xmm2, [c1]
mulps xmm2, xmm1
movaps xmm3, [c0]
addps xmm2, xmm3
movlhps xmm0, xmm3
mulps xmm2, xmm0
movaps [ecx], xmm2
mov eax, ecx
ret
sincos4_360 endp
sincos4 proc near
movaps xmm0, [edx] ; rotate angles to [-pi/2; pi/2]
movaps xmm1, xmm0
mulps xmm1, [c_one_over_two_pi4]
cvtps2pi mm1, xmm1
cvtpi2ps xmm1, mm1
movhlps xmm2, xmm1
cvtps2pi mm2, xmm2
cvtpi2ps xmm2, mm2
movlhps xmm1, xmm2
mulps xmm1, [c_two_pi4]
subps xmm0, xmm1
movaps xmm2, xmm0 ; map to [-pi/2; pi/2]
andps xmm0, [c_sign_bit4] ; sign(a)
xorps xmm2, xmm0 ; fabs(a)
movaps xmm1, [c_pi4]
subps xmm1, xmm2 ; pi - fabs(a)
movaps xmm3, xmm2
cmpnltps xmm3, [c_half_pi4]
movaps xmm7, [c_cos_sign_bit4]
andps xmm7, xmm3
movaps xmm4, xmm2
cmpltps xmm4, [c_half_pi4]
andps xmm3, xmm1
andps xmm4, xmm2
addps xmm3, xmm4 ; (fabs(a) > pi/2) ? pi - fabs(a) : fabs(a)
orps xmm0, xmm3
movaps xmm2, xmm0 ; compute sin and cos
mulps xmm2, xmm0
movaps xmm1, xmm2
mulps xmm2, [c5]
addps xmm2, [c4]
mulps xmm2, xmm1
addps xmm2, [c3]
mulps xmm2, xmm1
addps xmm2, [c2]
mulps xmm2, xmm1
addps xmm2, [c1]
mulps xmm2, xmm1
movaps xmm3, [c0]
addps xmm2, xmm3
movlhps xmm0, xmm3
mulps xmm2, xmm0
xorps xmm2, xmm7 ; flip cosines if needed
movaps [ecx], xmm2
mov eax, ecx ; VC expects eax to point to the result,
ret ; which is ecx due to the calling convention.
sincos4 endp
end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -