📄 mathsm.h
字号:
;/*
; * Mathematics assembler macros
; * Copyright (C) ARM Limited 1998-1999. All rights reserved.
; */
;==========================================================================
; ARM Maths routines
;==========================================================================
;
; Version : 0.15 Dated: 6/3/97
;
;==========================================================================
; This source file contains ARM algorithms for standard maths operations.
; Each is implemented in the form of a MACRO so that it can inlined.
;
; CONTENTS:
;
; INTEGER:
; c = a + abs(b) 32+32=32
; Signed-satured addition 32+32=32
; MULTIPLY 32x32=64, 64x64=64, 64x64=128
; DIVIDE 32/16=16, 32/32=32, 64/32=32, 64/64=64
; SQUARE ROOT sqr(32)=16
; CUBE ROOT cbr(32)=11
;
; FIXED POINT:
; DIVIDE 32/32=32
; SINE sin(32)=32
; COSINE cos(32)=32
;
INCLUDE regchekm.h
; The algorithms assume that ARM architecture 3M or 4 is available - with the
; long multipy instructions. If not then the next line is set to false to
; define replacements for _SMULL and _UMULL. If SMULL and UMULL are not
; available then extra registers will be needed. These are labelled
; mull_temp_* and must be free. They must be defined in an external file
; BEFORE including this file.
GBLL LONG_MULT
LONG_MULT SETL :LNOT:({ARCHITECTURE} = "3") ; TRUE if ARM architecture 3M or 4 (or later) is avialable
GBLA k ; general counter
IF :LNOT:LONG_MULT
; need extra registers so check they have been defined
ASSERT mul_temp_0>=0
ASSERT mul_temp_1>=0
ASSERT mul_temp_2>=0
ENDIF
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; BASIC MACROS ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; separate a 32 bit value into 2xunsigned 16 bit values
; resh and resl must be different
MACRO
$label USplit16 $resl, $resh, $src
DISTINCT $resh, $src
$label MOV $resh, $src, LSR #16
BIC $resl, $src, $resh, LSL #16
MEND
; separate a 32 bit value into two 16 bit values
; high part signed, low part unsigned
; resh and resl must be different
MACRO
$label SSplit16 $resl, $resh, $src
DISTINCT $resh, $src
$label MOV $resh, $src, ASR #16
BIC $resl, $src, $resh, LSL #16
MEND
; add a 32 bit value to a 64 bit value, shifted up 16 unsigned
MACRO
UAdd16 $resl, $resh, $x
ADDS $resl, $resl, $x, LSL#16
ADC $resh, $resh, $x, LSR#16
MEND
; add a 32 bit value to a 64 bit value, shifted up 16 signed
MACRO
SAdd16 $resl, $resh, $x
ADDS $resl, $resl, $x, LSL#16
ADC $resh, $resh, $x, ASR#16
MEND
; signed 32x32=64 and dl,dh,x must be distinct
; dh.dl = x*y
MACRO
_SMULL $dl, $dh, $x, $y
DISTINCT $dl, $dh, $x
IF LONG_MULT
SMULL $dl, $dh, $x, $y
ELSE
; no hardware multiplier
; extract y first - it may equal dl, dh or x
SSplit16 mul_temp_0, mul_temp_1, $y
SSplit16 mul_temp_2, $dh, $x
MUL $dl, mul_temp_0, mul_temp_2 ; low x * low y
MUL mul_temp_0, $dh, mul_temp_0 ; high x * low y
MUL $dh, mul_temp_1, $dh ; high y * high x
MUL mul_temp_1, mul_temp_2, mul_temp_1 ; low x * high y
SAdd16 $dl, $dh, mul_temp_0 ; add one middle value
SAdd16 $dl, $dh, mul_temp_1 ; add other middle value
ENDIF
MEND
; unsigned 32x32=64 and dl,dh,x must be distinct
; dh.dl = x*y
MACRO
_UMULL $dl, $dh, $x, $y
DISTINCT $dl, $dh, $x
IF LONG_MULT
; we have a hardware multiplier
UMULL $dl, $dh, $x, $y
ELSE
; no hardware multiplier
; extract y first - it may equal dl, dh or x
USplit16 mul_temp_0, mul_temp_1, $y
USplit16 mul_temp_2, $dh, $x
MUL $dl, mul_temp_0, mul_temp_2 ; low x * low y
MUL mul_temp_0, $dh, mul_temp_0 ; high x * low y
MUL $dh, mul_temp_1, $dh ; high y * high x
MUL mul_temp_1, mul_temp_2, mul_temp_1 ; low x * high y
UAdd16 $dl, $dh, mul_temp_0 ; add one middle value
UAdd16 $dl, $dh, mul_temp_1 ; add other middle value
ENDIF
MEND
;---------------------------------------------------------------
; c = a + abs(b)
; 2 cycles
;
;---------------------------------------------------------------
; $a, $b input integers
; $c output result of addition
;
; if b is positive then EORS sets c=b, carry=0 and the ADC sets
; c=a+b
;
; if b is negative then EORS sets c=NOT(b), carry=1 and the ADC
; sets c=a+NOT(b)+1=a-b since NOT(b)+1 is the 2's complement of b
;
; Registers $a and $c must be distinct registers from each other.
; Registers $b and $c must be distinct registers from each other.
; Registers $a and $b need not be distinct from each
;
;---------------------------------------------------------------
MACRO
ADDABS $c, $a, $b
DISTINCT $c, $a
DISTINCT $c, $b
EORS $c, $b, $b, ASR #32
ADC $c, $a, $c
MEND
;---------------------------------------------------------------
; c = Signed-Saturated(a+b)
; 2 cycles + 1 register constant
;
;---------------------------------------------------------------
; $a, $b input integers
; $c output result of addition
;
; constant = 0x80000000
;
; if there is no overflow then c=a+b and the second instruction
; has no effect
;
; if there is a positive overflow then top bit of c will be 1
; and so the EORVS instruction will move
; 0x80000000^0xffffffff=0x7fffffff into c
;
; if there is a negative overflow then top bit of c will be 0
; and so the EORVS instruction will move 0x80000000^0=0x80000000
; into c.
;
; Registers $c and $constant must be distinct registers from each other.
; Registers $a, $b and $constant need not be distinct from each other.
; Registers $a, $b and $c need not be distinct from each other.
;
;---------------------------------------------------------------
MACRO
SIGNSAT $c, $a, $b, $constant
DISTINCT $c, $constant
ADDS $c, $a, $b
EORVS $c, $constant, $c, ASR #31
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; INTEGER MULTIPLICATION MACROS ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; unsigned 32x32 = 64
; al = low 32 bits of b*c
; ah = high 32 bits of b*c
; al, ah, b must be distinct registers
MACRO
UMUL_32x32_64 $al, $ah, $b, $c
_UMULL $al, $ah, $b, $c
MEND
; signed 32x32 = 64
; as above but signed
MACRO
SMUL_32x32_64 $al, $ah, $b, $c
_SMULL $al, $ah, $b, $c
MEND
; multiply 64x64 = 64 (same signed as unsigned)
; ah.al = bh.bl * ch.cl
; al, ah, bl, bh must be distinct registers (and cl,ch distinct)
MACRO
MUL_64x64_64 $al, $ah, $bl, $bh, $cl, $ch
DISTINCT $al, $ah, $bl, $bh
DISTINCT $cl, $ch
_UMULL $al, $ah, $bl, $cl
MLA $ah, $bl, $ch, $ah
MLA $ah, $bh, $cl, $ah
MEND
; unsigned 64x64=128
; a = (a3,a2,a1,a0) = b*c where b=(bh,bl) c=(ch,cl)
; a pair of temporary registers (tl, th) are required
MACRO
UMUL_64x64_128 $a0, $a1, $a2, $a3, $bl, $bh, $cl, $ch, $tl, $th
_UMULL $a0, $a1, $bl, $cl ; bl * cl
_UMULL $a2, $a3, $bh, $ch ; bh * ch
_UMULL $tl, $th, $bl, $ch
ADDS $a1, $a1, $tl
ADCS $a2, $a2, $th
ADC $a3, $a3, #0
_UMULL $tl, $th, $bh, $cl
ADDS $a1, $a1, $tl
ADCS $a2, $a2, $th
ADC $a3, $a3, #0
MEND
; the following code sequence provides an alternative definition for
; unsigned 64x64=128 that is one instruction shorter, but only works
; out quicker in terms of cycles on an ARM9 or StrongARM
;
; the original is quicker for an ARM7TDMI since for UMLA takes an
; extra cycle in this code and thus this should only be used if using
; an ARM9 or a StrongARM processor
; MACRO
; UMUL_64x64_128 $a0, $a1, $a2, $a3, $bl, $bh, $cl, $ch, $t
; MOV $t,#0
; UMULL $a0,$a1,$bl,$cl
; UMULL $a2,$a3,$bl,$ch
; ADDS $a1,$a1,$a2
; ADC $a2,$t,#0
; UMLAL $a1,$a2,$bh,$cl
; ADDS $a2,$a2,$a3
; ADC $a3,$t,#0
; UMLAL $a2,$a3,$bh,$ch
; MEND
; signed 64x64 = 128
; as above but signed
MACRO
SMUL_64x64_128 $a0, $a1, $a2, $a3, $bl, $bh, $cl, $ch, $tl, $th
_UMULL $a0, $a1, $bl, $cl ; bl * cl
_SMULL $a2, $a3, $bh, $ch ; bh * ch
_SMULL $tl, $th, $bl, $ch
TST $bl, #1<<31
ADDNE $th, $th, $ch
ADDS $a1, $a1, $tl
ADCS $a2, $a2, $th
ADC $a3, $a3, $th, ASR#31 ; carry + sign bit
_SMULL $tl, $th, $bh, $cl
TST $cl, #1<<31
ADDNE $th, $th, $bh
ADDS $a1, $a1, $tl
ADCS $a2, $a2, $th
ADC $a3, $a3, $th, ASR#31 ; carry + sign bit
MEND
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;
; INTEGER DIVISION MACROS ;
; ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; 32/16 DIVISION - 2 cycles per answer bit ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; unsigned 32/16 = 16 with remainder 16
;
; n=numerator d=denomenator
; q=quotient r=remainder
;
; ie n/d = q + r/d or n=q*d+r and 0<=r<d
; it is assumed that n<(d<<16) for this to work (else q will overflow)
; in particular, divide by 0 is not possible!
; n and d must be distinct
; q and r must be distinct
; can have {q,r} = {n,d}
MACRO
UDIV_32d16_16r16 $q, $r, $n, $d, $flag
DISTINCT $q, $r
DISTINCT $d, $n
IF "$flag"<>"-"
RSB $d, $d, #0 ; negate divisor (if not already)
ENDIF
CMN $n, $d, LSL #8 ; is n<(d<<8) ?
MOV $d, $d, LSL #15 ; get denominator up high
MOVLO $n, $n, LSL #7 ; if (n<(d<<8)) skip 8 stages
BLO %FT01 ; skip first 8 stages
ADDS $n, $d, $n ; will d go?
SUBLO $n, $n, $d ; if not then add it back on
ADCS $n, $d, $n, LSL #1 ; add answer bit, shift up, try
SUBLO $n, $n, $d ; if it didn't go then add on d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
01
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
ADCS $n, $d, $n, LSL #1
SUBLO $n, $n, $d
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -