chipr32.asm

来自「开放源码的编译器open watcom 1.6.0版的源代码」· 汇编 代码 · 共 852 行 · 第 1/3 页

ASM
852
字号
;*****************************************************************************
;*
;*                            Open Watcom Project
;*
;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
;*
;*  ========================================================================
;*
;*    This file contains Original Code and/or Modifications of Original
;*    Code as defined in and that are subject to the Sybase Open Watcom
;*    Public License version 1.0 (the 'License'). You may not use this file
;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
;*    provided with the Original Code and Modifications, and is also
;*    available at www.sybase.com/developer/opensource.
;*
;*    The Original Code and all software distributed under the License are
;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
;*    NON-INFRINGEMENT. Please see the License for the specific language
;*    governing rights and limitations under the License.
;*
;*  ========================================================================
;*
;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
;*               DESCRIBE IT HERE!
;*
;*****************************************************************************


; static char sccs_id[] = "@(#)fprem32.asm      1.5  12/22/94  12:48:07";
;
; This code is being published by Intel to users of the Pentium(tm)
; processor.  Recipients are authorized to copy, modify, compile, use and
; distribute the code.
;
; Intel makes no warranty of any kind with regard to this code, including
; but not limited to, implied warranties or merchantability and fitness for
; a particular purpose. Intel assumes no responsibility for any errors that
; may appear in this code.
;
; No patent licenses are granted, express or implied.
;
;
include mdef.inc

        .386
        .387

;
;  PRELIMINARY VERSION of the software patch for the floating
;  point remainder.
;


CHECKSW MACRO
ifdef   DEBUG
        fnstsw  [fpsw]
        fnstcw  [fpcw]
endif
ENDM


DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'

;
;  Stack variables for remainder routines.
;

FLT_SIZE        EQU     12
DENOM           EQU     0
DENOM_SAVE      EQU     DENOM + FLT_SIZE
NUMER           EQU     DENOM_SAVE + FLT_SIZE
PREV_CW         EQU     NUMER + FLT_SIZE
PATCH_CW        EQU     PREV_CW + 4
FPREM_SW        EQU     PATCH_CW + 4
STACK_SIZE      EQU     FPREM_SW + 4
RET_SIZE        EQU     4
PUSH_SIZE       EQU     4

MAIN_FUDGE      EQU     RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE

MAIN_DENOM              EQU     DENOM + MAIN_FUDGE
MAIN_DENOM_SAVE         EQU     DENOM_SAVE + MAIN_FUDGE
MAIN_NUMER              EQU     NUMER + MAIN_FUDGE
MAIN_PREV_CW            EQU     PREV_CW + MAIN_FUDGE
MAIN_PATCH_CW           EQU     PATCH_CW + MAIN_FUDGE
MAIN_FPREM_SW           EQU     FPREM_SW + MAIN_FUDGE

ONESMASK        EQU     700h

fprem_risc_table        DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
fprem_scale             DB      0, 0, 0, 0, 0, 0, 0eeh, 03fh
one_shl_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 043h
one_shr_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 03bh
one                     DB      0, 0, 0, 0, 0, 0, 0f0h, 03fh
half                    DB      0, 0, 0, 0, 0, 0, 0e0h, 03fh
big_number              DB      0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh

ifdef   DEBUG
        public  fpcw
        public  fpsw
fpcw    dw      0
fpsw    dw      0
endif

FPU_STATE       STRUC
        CONTROL_WORD    DW      ?
        reserved_1      DW      ?
        STATUS_WORD     DD      ?
        TAG_WORD        DW      ?
        reserved_3      DW      ?
        IP_OFFSET       DD      ?
        CS_SLCT         DW      ?
        OPCODE          DW      ?
        DATA_OFFSET     DD      ?
        OPERAND_SLCT    DW      ?
        reserved_4      DW      ?
FPU_STATE       ENDS

ENV_SIZE        EQU     28


DATA32 ENDS

_TEXT  SEGMENT DWORD USE32 PUBLIC 'CODE'
_TEXT  ENDS

DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
DATA32  ENDS

CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
CONST32 ENDS

BSS32   SEGMENT DWORD USE32 PUBLIC 'BSS'
BSS32   ENDS

DGROUP  GROUP CONST32, BSS32, DATA32



CODE32  SEGMENT   DWORD USE32 PUBLIC 'CODE'

        assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing


fprem_common    PROC    NEAR

        push    eax
        push    ebx
        push    ecx
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
        xor     eax, ONESMASK           ; invert bits that have to be one
        test    eax, ONESMASK           ; check bits that have to be one
        jnz     remainder_hardware_ok
        shr     eax, 11
        and     eax, 0fh
        cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
        jz      remainder_hardware_ok

; The denominator has the bit pattern. Weed out the funny cases like NaNs
; before applying the software version. Our caller guarantees that the
; denominator is not a denormal. Here we check for:
;       denominator     inf, NaN, unnormal
;       numerator       inf, NaN, unnormal, denormal

        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
        and     eax, 07fff0000h         ; mask the exponent only
        cmp     eax, 07fff0000h         ; check for INF or NaN
        je      remainder_hardware_ok
        mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
        and     eax, 07fff0000h         ; mask the exponent only
        jz      remainder_hardware_ok   ; jif numerator denormal
        cmp     eax, 07fff0000h         ; check for INF or NaN
        je      remainder_hardware_ok
        mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
        add     eax, eax                ; set carry if explicit bit set
        jnz     remainder_hardware_ok   ; jmp if numerator is unnormal
        mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
        add     eax, eax                ; set carry if explicit bit set
        jnz     remainder_hardware_ok   ; jmp if denominator is unnormal

rem_patch:
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
        and     eax, 07fffh              ; clear sy
        add     eax, 63                  ; evaluate ey + 63
        mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
        and     ebx, 07fffh              ; clear sx
        sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
        ja      rem_large               ; if ex > ey + 63, case of large arguments
rem_patch_loop:
        mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
        and     eax, 07fffh             ; clear sy
        add     eax, 10                 ; evaluate ey + 10
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
        and     ebx, 07fffh             ; clear sx
        sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
        js      remainder_hardware_ok   ; safe if ey + 10 > ex
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
        and     ebx, 07fffh             ; clear sx
        mov     ecx, ebx
        sub     ebx, eax
        and     ebx, 07h
        or      ebx, 04h
        sub     ecx, ebx
        mov     ebx, eax
        and     ebx, 08000h             ; keep sy
        or      ecx, ebx                ; merge the sign of y
        mov     dword ptr [MAIN_DENOM+8+esp], ecx
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
        mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
        fxch
        fprem                           ; this rem is safe
        fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
        fstp    st(0)                   ; pop the stack
        jmp rem_patch_loop
rem_large:
        test    edx, 02h                ; is denominator already saved
        jnz     already_saved
        fld     tbyte ptr[esp + MAIN_DENOM]
        fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
already_saved:
        ; Save user's precision control and institute 80.  The fp ops in
        ; rem_large_loop must not round to user's precision (if it is less
        ; than 80) because the hardware would not have done so.  We are
        ; aping the hardware here, which is all extended.

        fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
        mov     eax, dword ptr[esp + MAIN_PREV_CW]
        or      eax, 033fh              ; mask exceptions, pc=80
        mov     [esp + MAIN_PATCH_CW], eax
        fldcw   [esp + MAIN_PATCH_CW]

        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
        and     eax, 07fffh             ; clear sy
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
        and     ebx, 07fffh             ; clear sx
        sub     ebx, eax                ; evaluate the exponent difference
        and     ebx, 03fh
        or      ebx, 020h
        add     ebx, 1
        mov     ecx, ebx
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
        and     ebx, 07fffh             ; clear sx
        and     eax, 08000h             ; keep sy
        or      ebx, eax                ; merge the sign of y
        mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
        fabs
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
        fabs
rem_large_loop:
        fcom
        fstsw  ax
        and     eax, 00100h
        jnz     rem_no_sub
        fsub    st, st(1)
rem_no_sub:
        fxch
        fmul    qword ptr half
        fxch
        sub     ecx, 1                  ; decrement the loop counter
        jnz     rem_large_loop
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
        fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
        fstp    st                      ; toss modified denom
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
        fld     tbyte ptr[big_number]   ; force C2 to be set
        fprem
        fstp    st
        fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result

        fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
        and     ebx, 08000h             ; keep sx
        jz      rem_done
        fchs
        jmp     rem_done
remainder_hardware_ok:
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?