chipa32.asm

来自「开放源码的编译器open watcom 1.6.0版的源代码」· 汇编 代码 · 共 398 行

ASM
398
字号
;*****************************************************************************
;*
;*                            Open Watcom Project
;*
;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
;*
;*  ========================================================================
;*
;*    This file contains Original Code and/or Modifications of Original
;*    Code as defined in and that are subject to the Sybase Open Watcom
;*    Public License version 1.0 (the 'License'). You may not use this file
;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
;*    provided with the Original Code and Modifications, and is also
;*    available at www.sybase.com/developer/opensource.
;*
;*    The Original Code and all software distributed under the License are
;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
;*    NON-INFRINGEMENT. Please see the License for the specific language
;*    governing rights and limitations under the License.
;*
;*  ========================================================================
;*
;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
;*               DESCRIBE IT HERE!
;*
;*****************************************************************************


; static char sccs_id[] = "@(#)fpatan32.asm     1.7  12/21/94  08:33:45";
;
; This code is being published by Intel to users of the Pentium(tm)
; processor.  Recipients are authorized to copy, modify, compile, use and
; distribute the code.
;
; Intel makes no warranty of any kind with regard to this code, including
; but not limited to, implied warranties or merchantability and fitness for
; a particular purpose. Intel assumes no responsibility for any errors that
; may appear in this code.
;
; No patent licenses are granted, express or implied.
;
;
include mdef.inc

        .386
        .387


_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
_TEXT  ENDS

CONST   SEGMENT DWORD PUBLIC USE32 'DATA'
CONST   ENDS

CONST2  SEGMENT DWORD PUBLIC USE32 'DATA'
CONST2  ENDS

DATA32   SEGMENT DWORD PUBLIC USE32 'DATA'


Y               EQU     0
X               EQU     12
PREV_CW         EQU     24
PATCH_CW        EQU     28
SPILL           EQU     32
STACK_SIZE      EQU     36


pos_1   DD   00000000H
        DD   3ff00000H

neg_1   DD   00000000H
        DD   0bff00000H


dispatch_table  DD      offset label0
                DD      offset label1
                DD      offset label2
                DD      offset label3
                DD      offset label4
                DD      offset label5
                DD      offset label6
                DD      offset label7
;end dispatch table

pi      DB      35H
        DB      0c2H
        DD      0daa22168H
        DD      4000c90fH

pi_by_2 DB      35H
        DB      0c2H
        DD      0daa22168H
        DD      3fffc90fH

flt_sixteen DD  41800000H

one_by_sixteen  DD 3d800000H


B1      DW      0AAA8H
        DD      0AAAAAAAAH
        DD      0BFFDAAAAH

B2      DW      2D6EH
        DD      0CCCCCCCCH
        DD      3FFCCCCCH

B3      DW      4892H
        DD      249241F9H
        DD      0BFFC9249H

B4      DW      0C592H
        DD      3897CDECH
        DD      3FFBE38EH

B5      DW      5DDDH
        DD      0C17BC162H
        DD      0BFFBBA2DH

B6      DW      4854H
        DD      77C7C78EH
        DD      3FFB9C80H


atan_k_by_16    dd 000000000H, 000000000H, 000000000H, 000000000H
                dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H
                dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H
                dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H
                dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H
                dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H
                dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H
                dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H
                dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H
                dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H
                dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H
                dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H
                dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H
                dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H
                dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H
                dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H
                dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H

DATA32  ENDS

BSS32   SEGMENT DWORD PUBLIC USE32 'BSS'
BSS32   ENDS


EXTRN   __fdiv_fpr:NEAR

DGROUP  GROUP CONST,CONST2,DATA32,BSS32


_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
        ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:nothing
        public __fpatan_chk

        defpe   __fpatan_chk
        push    eax
        push    ecx
        push    edx
        sub     esp, STACK_SIZE
        fstp    tbyte ptr [esp+X]       ; save X
        fstp    tbyte ptr [esp+Y]       ; save Y

        mov     ecx, [esp+Y+4]
        add     ecx, ecx
        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
        mov     eax, [esp+X+4]
        add     eax, eax
        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
        mov     ecx, [esp+Y+8]          ; save high part of Y
        mov     eax, [esp+X+8]          ; save high part of Y
        and     ecx, 7fffh              ; Ey = exponent Y
        jz      hw_fpatan               ; Ey = 0
        and     eax, 7fffh              ; Ex = exponent X
        jz      hw_fpatan               ; Ex = 0
        cmp     ecx, 7fffh              ; check if Ey = 0x7fffh
        je      hw_fpatan
        cmp     eax, 7fffh              ; check if Ex = 0x7fffh
        je      hw_fpatan

        fld     tbyte ptr [esp+X]       ; reload X
        fabs                            ; |X| = u
        fld     tbyte ptr [esp+Y]       ; reload Y
        fabs                            ; |Y| = v

;  The following five lines turn off exceptions and set the
;  precision control to 80 bits.  The former is necessary to
;  force any traps to be taken at the divide instead of the scaling
;  code.  The latter is necessary in order to get full precision for
;  codes with incoming 32 and 64 bit precision settings.  If
;  it can be guaranteed that before reaching this point, the underflow
;  exception is masked and the precision control is at 80 bits, these
;  five lines can be omitted.
;
        fnstcw  [PREV_CW+esp]           ; save caller's control word
        mov     edx, [PREV_CW+esp]
        or      edx, 033fh              ; mask exceptions, pc=80
        and     edx, 0f3ffh
        mov     [PATCH_CW+esp], edx
        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80


        xor     edx, edx                ; initialize sflag = 0
        fcom                            ; |Y| > |x|
        push    eax
        fstsw  ax
        sahf
        pop     eax
        jb      order_X_Y_ok
        fxch
        inc     edx                     ; sflag = 1
order_X_Y_ok:
        push    eax
        mov     eax, 0fh
        call    __fdiv_fpr                  ; v/u = z
        pop     eax
        fld     dword ptr flt_sixteen   ; 16.0
        fmul    st, st(1)               ; z*16.0
; Top of stack looks like k, z
        fistp   dword ptr [SPILL+esp]   ; store k as int
        mov     ecx, [SPILL+esp]
        shl     ecx, 4
        fild    dword ptr[SPILL+esp]
        fmul    dword ptr one_by_sixteen; 1.0/16.0
; Top of stack looks like g, z
        fld     st(1)                   ; duplicate g
        fsub    st, st(1)               ; z-g = r
        fxch
; Top of stack looks like g, r, z
        fmulp   st(2), st               ; g*z
; Top of stack looks like r, g*z
        fld     qword ptr pos_1         ; load 1.0
        faddp   st(2), st               ; 1+g*z
; Top of stack looks like r, 1+g*z
        push    eax
        mov     eax, 0fh
        call    __fdiv_fpr                  ; v/u = z
        pop     eax
        fld     st(0)                   ; duplicate s
        fmul    st,st(1)                ; t = s*s
; Top of stack looks like t, s

        fld     st(0)
        fmul    st, st(1)
; Top of stack looks like t2, t, s
        fld     st(0)
        fmul    st, st(1)
        fld     tbyte ptr B6
        fld     tbyte ptr B5
; Top of stack looks like B5, B6, t4, t2, t, s
        fxch
        fmul    st, st(2)
        fld     tbyte ptr B4
        fxch    st(2)
        fmul    st, st(3)
; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s
        fld     tbyte ptr B3
        fxch    st(2)
        fmul    st, st(5)
; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s
        fxch    st(3)
        fmulp   st(4), st
        fld     tbyte ptr B2
; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s
        fxch    st(3)
        faddp   st(4), st
        mov     eax, [esp+X+8]
        fld     tbyte ptr B1
        fxch
        shl     eax, 16
; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s
        fmul    st, st(6)
        fxch    st(2)
        add     eax, eax
        fmul    st, st(5)
; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s
        fxch    st(3)
        adc     edx, edx                ; |sflag|Sx|
        fmulp   st(5), st
        fxch    st(2)
        mov     eax, [Y+8+esp]          ; save high part of Y
        fmul    st, st(5)
; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s
        fxch    st(2)
        shl     eax, 16
        fmulp   st(5), st
; Top of stack looks like  B5t5, B3t3, even, B2t2, B1t, s
        fxch    st(2)
        faddp   st(3), st
        add     eax, eax
        faddp   st(1), st
        adc     edx, edx                ; |sflag|Sx|Sy|
; Top of stack looks like  odd, even, B1t, s
        faddp   st(2), st
        faddp   st(1), st
        fmul    st,st(1)                ; s*(odd+even)
        faddp   st(1), st               ; poly

        fld     tbyte ptr atan_k_by_16[ecx]     ; arctan[k;16]
        faddp   st(1), st               ; w = poly + arctan(g)

        jmp     dword ptr dispatch_table[edx*4]

label0:
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
label1:
        fchs
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
label2:
        fld     tbyte ptr pi
        fsubrp  st(1), st               ; pi - w
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
label3:
        fld     tbyte ptr pi
        fsubrp  st(1), st               ; pi - w
        fchs                            ; - (pi - w)
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
label4:
        fld     tbyte ptr pi_by_2
        fsubrp  st(1), st               ; pi/2 - w
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
label5:
        fld     tbyte ptr pi_by_2
        fsubrp  st(1), st               ; pi/2 - w
        fchs                            ; - (pi/2 - w)
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
label6:
        fld     tbyte ptr pi_by_2
        faddp   st(1), st               ; pi/2 + w
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
label7:
        fld     tbyte ptr pi_by_2
        faddp   st(1), st               ; pi/2 + w
        fchs                            ; -(pi/2+w)
        fldcw   [esp+PREV_CW]
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret


hw_fpatan:
        fld     tbyte ptr [esp+Y]       ; reload Y
        fld     tbyte ptr [esp+X]       ; reload X
        fpatan
        add     esp, STACK_SIZE
        pop     edx
        pop     ecx
        pop     eax
        ret
__fpatan_chk       ENDP

_TEXT  ENDS
        END

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?