📄 mrmuldv.any

📁 miracl-大数运算库,大家使用有什么问题请多多提意见
💻 ANY
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*
 *  MIRACL - various implementations of routines muldiv, muldvm, muldvd
 *           muldvd2 and imuldiv
 *  mrmuldv.c
 *
 *  THIS FILE CONTAINS MANY VERSIONS OF THESE ROUTINES
 *  COPY THIS FILE TO MRMULDV.C AND DELETE THOSE PARTS IRRELEVANT TO
 *  YOUR REQUIREMENTS. 
 *
 *  NOTE: - This file and its contents are not needed 
 *  if MR_NOASM is defined in mirdef.h
 * 
 *  muldiv() calculates (a*b+c)/m and (a*b+c)%m as quickly as possible. Should 
 *  ideally be written in assembly language of target machine for speed 
 *  The problem is to avoid overflow in the calculation of the intermediate 
 *  product a*b+c. 
 *
 *  If using a floating-point underlying type, and rounding can be 
 *  controlled, it makes sense to pre-calculate
 *  the inverse of the modulus m, and multiply instead of divide
 *  In this situation a function imuldiv() is also needed.
 *
 *  muldvm() and muldvd() routines are necessary to support full-width number 
 *  base working. They are not needed if MR_NOFULLWIDTH is defined in mirdef.h.
 *
 *  muldvm - returns (a*base+c)/m and remainder
 *  muldvd - returns (a*b+c)/base and remainder 
 *
 *  NOTE: New to version 4.2, new routine muldvd2() is required.
 *  See C version below for specification
 *  Versions of this are easily developed from existing muldvd() programs
 *
 *  In most applications muldvd2() will be the time critical routine.
 *
 *  Note that full-width base working may not be possible for all processors. 
 *  For example it cannot be used on a VAX, or RS/6000 with mr_utypes defined  
 *  as ints. This is because the instruction set does not support 
 *  unsigned multiply and divide instructions. In such cases ALWAYS use a 
 *  maximum base of MAXBASE in mirsys(), rather than 0.
 *
 *  Since parameter passing and returning is time-consuming, these routines 
 *  should be generated 'inline', if compiler allows it. Parameter passing
 *  by register will also be faster than via the stack. For even faster 
 *  operation, use in-line assembly to speed up the inner loops of routines 
 *  pmul(), sdiv(), multiply() and divide(). See these routines for details 
 *  of Microsoft/Borland C inline 80x86 assembly, which gives a substantial speed-up.
 *
 *  NOTE: All other things being equal, versions of MIRACL with 32-bit mr_utypes
 *  will run 3-4 times faster than versions with 16-bit mr_utypes, even for medium
 *  precision arithmetic, such as used in Public Key systems. 
 *  
 *  Note that a portable C version of 'muldiv' may not possible with some 
 *  32-bit compilers if ints and longs are both 32-bits and there is no 
 *  64-bit type. Fortunately these days there usually is such a type - called 
 *  perhaps long long, or maybe __int64. See also the Blakely-Sloan 
 *  method below. In any case the portable versions may be used if mr_utypes 
 *  are defined as shorts, usually 16 bits. This would amount however to 
 *  using the 32-bit processor in a 16 bit mode and would be very inefficient 
 *  - up to 4 times slower. See mirdef.haf 
 *
 *  First the standard portable versions, for use when there is a double 
 *  length type capable of holding the product of two mr_utype types.
 *  For example 32 and 16 bits types respectively. 
 *  Note that if MR_NOASM is defined in mirdef.h, these routines are 
 *  implemented in mrcore.c, and do not need to be extracted from here.
 * 
 *  This is followed by various other assembly language implementations for 
 *  popular processors, computers and compilers.
 *


**************************************************************

/* Standard C version of mrmuldv.c */

#include <stdio.h>
#include "miracl.h"

mr_small muldiv(mr_small a,mr_small b,mr_small c,mr_small m,mr_small *rp)
{
    mr_small q;
    mr_large ldres,dble=(mr_large)a*b+c;
    q=(mr_small)MR_LROUND(dble/m);
    *rp=(mr_small)(dble-(mr_large)q*m);
    return q;
}

#ifdef MR_FP_ROUNDING

mr_small imuldiv(mr_small a,mr_small b,mr_small c,mr_small m,mr_large im,mr_small *rp)
{
    mr_small q;
    mr_large ldres,dble=(mr_large)a*b+c;
    q=(mr_small)MR_LROUND(dble*im);
    *rp=(mr_small)(dble-(mr_large)q*m);
    return q;
}

#endif


#ifndef MR_NOFULLWIDTH

mr_small muldvm(mr_small a,mr_small c,mr_small m,mr_small *rp)
{
    mr_small q;
    union doubleword dble;
    dble.h[MR_BOT]=c;
    dble.h[MR_TOP]=a;
    q=(mr_small)(dble.d/m);
    *rp=(mr_small)(dble.d-(mr_large)q*m);
    return q;
}

mr_small muldvd(mr_small a,mr_small b,mr_small c,mr_small *rp)
{
    union doubleword dble; 
    dble.d=(mr_large)a*b+c;
    *rp=dble.h[MR_BOT];
    return dble.h[MR_TOP];
}

void muldvd2(mr_small a,mr_small b,mr_small *c,mr_small *rp)
{
    union doubleword dble; 
    dble.d=(mr_large)a*b+*c+*rp;
    *rp=dble.h[MR_BOT];
    *c=dble.h[MR_TOP];
}

#endif


****************************************************************

//
// Version of muldiv() for use with underlying type a double
// and using the FP co-processor on a Pentium, and the gcc compiler.
// In this case MR_NOFULLWIDTH is defined.
// This is much better than compiling the above, but fprem and fdiv 
// are still very slow. 
//

        .file   "mrmuldv.s"
.text
.globl _muldiv
_muldiv:

        pushl %ebx
       
        fldl 8(%esp)
        fmull 16(%esp)
        movl 40(%esp),%ebx
        faddl 24(%esp)
        fldl 32(%esp)
        fld %st(1)

// NOTE: If rounding control is possible, set rounding to "chop"
//       and replace lines below with these
//       In this case #define MR_FP_ROUNDING will be defined in mirdef.h
//
//        fdiv %st(1),%st
//        fistpq 8(%esp)
//        fildq 8(%esp)
//        fmul %st,%st(1)
//        fxch %st(2)
//        fsubp %st,%st(1)
//        fstpl (%ebx)

        fprem
        fstl (%ebx)
        fsubrp %st,%st(2)
        fdivrp %st,%st(1)

        popl %ebx
        ret

//
// If MR_FP_ROUNDING is defined, this function will be needed for Pentium
//
.globl _imuldiv
_imuldiv:

        pushl %ebx
       
        fldl 8(%esp)
        fmull 16(%esp)
        movl 52(%esp),%ebx
        faddl 24(%esp)
        fldl 32(%esp)
        fld %st(1)

        fldt 40(%esp)
        fmulp %st,%st(1) 
        fistpq 8(%esp)
        fildq 8(%esp)
        fmul %st,%st(1)
        fxch %st(2)
        fsubp %st,%st(1)
        fstpl (%ebx)

        popl %ebx
        ret


************************************************************************

/*
 *  Borland C++ 32-bit compiler (BCC32) version of the above. 
 *  Uses inline assembly feature. Suitable for Win32 Apps
 *  Also compatible with Microsoft Visual C++ 32-bit compiler
 *  BUT change TBYTE to QWORD
 */
#include "mirdef.h"

#define ASM _asm

double muldiv(double a,double b,double c,double m,double *rp)
{
        ASM fld   QWORD PTR a
        ASM fmul  QWORD PTR b
        ASM mov   ebx,DWORD PTR rp
        ASM fadd  QWORD PTR c
        ASM fld   QWORD PTR m
        ASM fld   st(1)

#ifdef MR_FP_ROUNDING
        ASM fdiv  st,st(1)
        ASM fistp QWORD PTR [ebx]
        ASM fild  QWORD PTR [ebx]
        ASM fmul  st(1),st
        ASM fxch  st(2)
        ASM fsubrp st(1),st
        ASM fstp   QWORD PTR [ebx]
#else 
        ASM fprem       
        ASM fst   QWORD PTR [ebx]
        ASM fsubp st(2),st
        ASM fdivp st(1),st
#endif
}

#ifdef MR_FP_ROUNDING

double imuldiv(double a,double b,double c,double m,long double im,double *rp)
{
        ASM fld   QWORD PTR a
        ASM fmul  QWORD PTR b
        ASM fld   QWORD PTR m
        ASM fxch  st(1) 
        ASM fadd  QWORD PTR c
        ASM mov   ebx,DWORD PTR rp
        ASM fxch  st(1) 
        ASM fld   st(1)

        ASM fld   TBYTE PTR im   /* QWORD for Microsoft */
        ASM fmulp st(1),st
        ASM fistp QWORD PTR [ebx]
        ASM fild  QWORD PTR [ebx]
        ASM fmul  st(1),st
        ASM fxch  st(2)
        ASM fsubrp st(1),st
        ASM fstp   QWORD PTR [ebx]
}

#endif


*********************************************************************

;
;   VAX11 version for Dec C compiler
;   with 32 bit int using 64-bit quadword
;   for the intermediate product. 
;
;   Use with mirdef.h32 - but define MR_NOFULLWIDTH 
;   Use mirsys(...,MAXBASE) instead of mirsys(...,0) in your programs. 
;   
;   Why ...(MIRACL-2) instead of ...(MIRACL-1) ? That's a negative
;   number for division by mr_base!
;
;   The problem is that the emul and ediv instructions work only
;   for signed types
;
    .entry muldiv,0
    subl   #4,sp
    emul   4(ap),8(ap),12(ap),r0   ;a*b+c
    ediv   16(ap),r0,r0,@20(ap)    ;quo. in r0, rem. in *rp
    ret
    .end
;
;   Fullwidth base working not possible on VAX, so no muldvm() or muldvd()
;
;


**********************************************************************


#
#    Version of muldiv.c for IBM RS/6000
#    This processor has no unsigned multiply/divide
#    so full-width base not possible, so no muldvm() or muldvd()
#
#    Use with mirdef.h32 but define MR_NOFULLWIDTH definition.
#    Use mirsys(...,MAXBASE) instead of mirsys(...,0) in your programs. 
#
#    Note this version was developed from very inadequate RS/6000
#    documentation. It may not be optimal, and it may not always work
#    (although it works fine for me!)
#    

                                   
        .file   "mrmuldv.s"
        .globl  .muldiv[PR]
        .csect  .muldiv[PR]

# parameters are passed in registers 3,4,5,6 and 7
# the mq register holds the low 32-bits for mul/div

        mul     3,4,3          # q=a*b
        mfmq    4              # get low part from mq
        a       4,5,4          # add in c
        aze     3,3            # add carry to high part
        mtmq    4              # move low part to mq
        div     3,3,6          # q=(a*b+c)/m
        mfmq    4              # get remainder 
        st      4,0(7)         # store remainder

# quotient is returned in register 3

        brl

************************************************************************

/* Here's another portable method which might be considered for processors
 * like the VAX and RS6000. The idea is due to Peter Montgomery.  */

    #include "mirdef.h"

    typedef unsigned mr_utype uint;

    uint muldiv(a,b,c,m,rp)
    uint a,b,c,m,*rp;
    {
        int q,r;
        q=(int)(0.5+((double)a*(double)b+(double)c)/(double)m);
        r=(int)(((uint)a*(uint)b+(uint)c)-(uint)m*(uint)q);
        if (r < 0)
        {
            r+=m;
            q--;
        }
        *rp=r;
        return q;
    }

**********************************************************************


;
;  IBM-PC versions - small memory model only
;  Easily modified for other memory models
;
;  For large code models (e.g. medium)
;
;           change    _TEXT    to    mrmuldv_TEXT  (in three places)
;           change    NEAR     to    FAR
;           change    [bp+4]   to    [bp+6]
;           change    [bp+6]   to    [bp+8]
;           change    [bp+8]   to    [bp+10]
;           change    [bp+10]  to    [bp+12]
;           change    [bp+12]  to    [bp+14]
;
;  For large data models, see Turbo C version below for required modification
;
;  Microsoft C compiler V4.0+
;  Written for MS macro-assembler
;
        ASSUME CS:_TEXT
_TEXT   SEGMENT BYTE PUBLIC 'CODE'

        PUBLIC _muldiv
_muldiv PROC NEAR
        push bp                 ;standard C linkage
        mov  bp,sp

        mov  ax,[bp+4]          ;get a
        mul  WORD PTR [bp+6]    ;multiply by b
        add  ax,[bp+8]          ;add c to low word
        adc  dx,0h              ;add carry to high word
        div  WORD PTR [bp+10]   ;divide by m
        mov  bx,[bp+12]         ;get address for remainder
        mov  [bx],dx            ;store remainder

        pop  bp                 ;standard C return
        ret                     ;quotient in ax

_muldiv endP

        PUBLIC _muldvm
_muldvm PROC NEAR
        push bp                 ;standard C linkage
        mov  bp,sp

        mov  dx,[bp+4]          ;get a
        mov  ax,[bp+6]          ;add in c
        div  WORD PTR [bp+8]    ;divide by m
        mov  bx,[bp+10]         ;get address for remainder
        mov  [bx],dx            ;store remainder

        pop  bp                 ;standard C return
        ret                     ;quotient in ax

_muldvm endP

        PUBLIC _muldvd
_muldvd PROC NEAR
        push bp                 ;standard C linkage
        mov  bp,sp

        mov  ax,[bp+4]          ;get a
        mul  WORD PTR [bp+6]    ;multiply by b
        add  ax,[bp+8]          ;add c to low word
        adc  dx,0h              ;add carry to high word
        mov  bx,[bp+10]         ;get address for remainder
        mov  [bx],ax            ;store remainder
        mov  ax,dx
        pop  bp                 ;standard C return
        ret                     ;quotient in ax

_muldvd endP

        PUBLIC _muldvd2
_muldvd2 PROC NEAR
        push bp                 ;standard C linkage
        mov  bp,sp
        push si

        mov  ax,[bp+4]          ;get a
        mul  WORD PTR [bp+6]    ;multiply by b
        mov  bx,[bp+8]          ;get address for c
        add  ax,[bx]            ;add c
        adc  dx,0h              ;add carry to high word

        mov  si,[bp+10]         ;get address for remainder
        add  ax,[si]            ;add rp
        adc  dx,0h              ;add carry to high word

        mov  [si],ax            ;store remainder
12 3 4 5 下一页
💿 文件大小 1432 K
👤 上传用户 foxfsk
📂 所属分类数据结构
🏷️ 相关标签

#miracl #运算 #家
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -