📄 setox.s
字号:
.long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 .long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 .long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A .set ADJFLAG,L_SCR2 .set SCALE,FP_SCR1 .set ADJSCALE,FP_SCR2 .set SC,FP_SCR3 .set ONEBYSC,FP_SCR4 | xref t_frcinx |xref t_extdnrm |xref t_unfl |xref t_ovfl .global setoxdsetoxd:|--entry point for EXP(X), X is denormalized movel (%a0),%d0 andil #0x80000000,%d0 oril #0x00800000,%d0 | ...sign(X)*2^(-126) movel %d0,-(%sp) fmoves #0x3F800000,%fp0 fmovel %d1,%fpcr fadds (%sp)+,%fp0 bra t_frcinx .global setoxsetox:|--entry point for EXP(X), here X is finite, non-zero, and not NaN's|--Step 1. movel (%a0),%d0 | ...load part of input X andil #0x7FFF0000,%d0 | ...biased expo. of X cmpil #0x3FBE0000,%d0 | ...2^(-65) bges EXPC1 | ...normal case bra EXPSMEXPC1:|--The case |X| >= 2^(-65) movew 4(%a0),%d0 | ...expo. and partial sig. of |X| cmpil #0x400CB167,%d0 | ...16380 log2 trunc. 16 bits blts EXPMAIN | ...normal case bra EXPBIGEXPMAIN:|--Step 2.|--This is the normal branch: 2^(-65) <= |X| < 16380 log2. fmovex (%a0),%fp0 | ...load input from (a0) fmovex %fp0,%fp1 fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 movel #0,ADJFLAG(%a6) fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) lea EXPTBL,%a1 fmovel %d0,%fp0 | ...convert to floating-format movel %d0,L_SCR1(%a6) | ...save N temporarily andil #0x3F,%d0 | ...D0 is J = N mod 64 lsll #4,%d0 addal %d0,%a1 | ...address of 2^(J/64) movel L_SCR1(%a6),%d0 asrl #6,%d0 | ...D0 is M addiw #0x3FFF,%d0 | ...biased expo. of 2^(M) movew L2,L_SCR1(%a6) | ...prefetch L2, no need in CBEXPCONT1:|--Step 3.|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,|--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) fmovex %fp0,%fp2 fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64) fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64 faddx %fp1,%fp0 | ...X + N*L1 faddx %fp2,%fp0 | ...fp0 is R, reduced arg.| MOVE.W #$3FA5,EXPA3 ...load EXPA3 in cache|--Step 4.|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R|--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] fmovex %fp0,%fp1 fmulx %fp1,%fp1 | ...fp1 IS S = R*R fmoves #0x3AB60B70,%fp2 | ...fp2 IS A5| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache fmulx %fp1,%fp2 | ...fp2 IS S*A5 fmovex %fp1,%fp3 fmuls #0x3C088895,%fp3 | ...fp3 IS S*A4 faddd EXPA3,%fp2 | ...fp2 IS A3+S*A5 faddd EXPA2,%fp3 | ...fp3 IS A2+S*A4 fmulx %fp1,%fp2 | ...fp2 IS S*(A3+S*A5) movew %d0,SCALE(%a6) | ...SCALE is 2^(M) in extended clrw SCALE+2(%a6) movel #0x80000000,SCALE+4(%a6) clrl SCALE+8(%a6) fmulx %fp1,%fp3 | ...fp3 IS S*(A2+S*A4) fadds #0x3F000000,%fp2 | ...fp2 IS A1+S*(A3+S*A5) fmulx %fp0,%fp3 | ...fp3 IS R*S*(A2+S*A4) fmulx %fp1,%fp2 | ...fp2 IS S*(A1+S*(A3+S*A5)) faddx %fp3,%fp0 | ...fp0 IS R+R*S*(A2+S*A4),| ...fp3 released fmovex (%a1)+,%fp1 | ...fp1 is lead. pt. of 2^(J/64) faddx %fp2,%fp0 | ...fp0 is EXP(R) - 1| ...fp2 released|--Step 5|--final reconstruction process|--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) fmulx %fp1,%fp0 | ...2^(J/64)*(Exp(R)-1) fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored fadds (%a1),%fp0 | ...accurate 2^(J/64) faddx %fp1,%fp0 | ...2^(J/64) + 2^(J/64)*... movel ADJFLAG(%a6),%d0|--Step 6 tstl %d0 beqs NORMALADJUST: fmulx ADJSCALE(%a6),%fp0NORMAL: fmovel %d1,%FPCR | ...restore user FPCR fmulx SCALE(%a6),%fp0 | ...multiply 2^(M) bra t_frcinxEXPSM:|--Step 7 fmovemx (%a0),%fp0-%fp0 | ...in case X is denormalized fmovel %d1,%FPCR fadds #0x3F800000,%fp0 | ...1+X in user mode bra t_frcinxEXPBIG:|--Step 8 cmpil #0x400CB27C,%d0 | ...16480 log2 bgts EXP2BIG|--Steps 8.2 -- 8.6 fmovex (%a0),%fp0 | ...load input from (a0) fmovex %fp0,%fp1 fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 movel #1,ADJFLAG(%a6) fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) lea EXPTBL,%a1 fmovel %d0,%fp0 | ...convert to floating-format movel %d0,L_SCR1(%a6) | ...save N temporarily andil #0x3F,%d0 | ...D0 is J = N mod 64 lsll #4,%d0 addal %d0,%a1 | ...address of 2^(J/64) movel L_SCR1(%a6),%d0 asrl #6,%d0 | ...D0 is K movel %d0,L_SCR1(%a6) | ...save K temporarily asrl #1,%d0 | ...D0 is M1 subl %d0,L_SCR1(%a6) | ...a1 is M addiw #0x3FFF,%d0 | ...biased expo. of 2^(M1) movew %d0,ADJSCALE(%a6) | ...ADJSCALE := 2^(M1) clrw ADJSCALE+2(%a6) movel #0x80000000,ADJSCALE+4(%a6) clrl ADJSCALE+8(%a6) movel L_SCR1(%a6),%d0 | ...D0 is M addiw #0x3FFF,%d0 | ...biased expo. of 2^(M) bra EXPCONT1 | ...go back to Step 3EXP2BIG:|--Step 9 fmovel %d1,%FPCR movel (%a0),%d0 bclrb #sign_bit,(%a0) | ...setox always returns positive cmpil #0,%d0 blt t_unfl bra t_ovfl .global setoxm1dsetoxm1d:|--entry point for EXPM1(X), here X is denormalized|--Step 0. bra t_extdnrm .global setoxm1setoxm1:|--entry point for EXPM1(X), here X is finite, non-zero, non-NaN|--Step 1.|--Step 1.1 movel (%a0),%d0 | ...load part of input X andil #0x7FFF0000,%d0 | ...biased expo. of X cmpil #0x3FFD0000,%d0 | ...1/4 bges EM1CON1 | ...|X| >= 1/4 bra EM1SMEM1CON1:|--Step 1.3|--The case |X| >= 1/4 movew 4(%a0),%d0 | ...expo. and partial sig. of |X| cmpil #0x4004C215,%d0 | ...70log2 rounded up to 16 bits bles EM1MAIN | ...1/4 <= |X| <= 70log2 bra EM1BIGEM1MAIN:|--Step 2.|--This is the case: 1/4 <= |X| <= 70 log2. fmovex (%a0),%fp0 | ...load input from (a0) fmovex %fp0,%fp1 fmuls #0x42B8AA3B,%fp0 | ...64/log2 * X fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2| MOVE.W #$3F81,EM1A4 ...prefetch in CB mode fmovel %fp0,%d0 | ...N = int( X * 64/log2 ) lea EXPTBL,%a1 fmovel %d0,%fp0 | ...convert to floating-format movel %d0,L_SCR1(%a6) | ...save N temporarily andil #0x3F,%d0 | ...D0 is J = N mod 64 lsll #4,%d0 addal %d0,%a1 | ...address of 2^(J/64) movel L_SCR1(%a6),%d0 asrl #6,%d0 | ...D0 is M movel %d0,L_SCR1(%a6) | ...save a copy of M| MOVE.W #$3FDC,L2 ...prefetch L2 in CB mode|--Step 3.|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,|--a0 points to 2^(J/64), D0 and a1 both contain M fmovex %fp0,%fp2 fmuls #0xBC317218,%fp0 | ...N * L1, L1 = lead(-log2/64) fmulx L2,%fp2 | ...N * L2, L1+L2 = -log2/64 faddx %fp1,%fp0 | ...X + N*L1 faddx %fp2,%fp0 | ...fp0 is R, reduced arg.| MOVE.W #$3FC5,EM1A2 ...load EM1A2 in cache addiw #0x3FFF,%d0 | ...D0 is biased expo. of 2^M|--Step 4.|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R|--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] fmovex %fp0,%fp1 fmulx %fp1,%fp1 | ...fp1 IS S = R*R fmoves #0x3950097B,%fp2 | ...fp2 IS a6| MOVE.W #0,2(%a1) ...load 2^(J/64) in cache fmulx %fp1,%fp2 | ...fp2 IS S*A6 fmovex %fp1,%fp3 fmuls #0x3AB60B6A,%fp3 | ...fp3 IS S*A5 faddd EM1A4,%fp2 | ...fp2 IS A4+S*A6 faddd EM1A3,%fp3 | ...fp3 IS A3+S*A5 movew %d0,SC(%a6) | ...SC is 2^(M) in extended clrw SC+2(%a6) movel #0x80000000,SC+4(%a6) clrl SC+8(%a6) fmulx %fp1,%fp2 | ...fp2 IS S*(A4+S*A6) movel L_SCR1(%a6),%d0 | ...D0 is M negw %d0 | ...D0 is -M fmulx %fp1,%fp3 | ...fp3 IS S*(A3+S*A5) addiw #0x3FFF,%d0 | ...biased expo. of 2^(-M) faddd EM1A2,%fp2 | ...fp2 IS A2+S*(A4+S*A6) fadds #0x3F000000,%fp3 | ...fp3 IS A1+S*(A3+S*A5) fmulx %fp1,%fp2 | ...fp2 IS S*(A2+S*(A4+S*A6)) oriw #0x8000,%d0 | ...signed/expo. of -2^(-M) movew %d0,ONEBYSC(%a6) | ...OnebySc is -2^(-M) clrw ONEBYSC+2(%a6) movel #0x80000000,ONEBYSC+4(%a6) clrl ONEBYSC+8(%a6) fmulx %fp3,%fp1 | ...fp1 IS S*(A1+S*(A3+S*A5))| ...fp3 released fmulx %fp0,%fp2 | ...fp2 IS R*S*(A2+S*(A4+S*A6)) faddx %fp1,%fp0 | ...fp0 IS R+S*(A1+S*(A3+S*A5))| ...fp1 released faddx %fp2,%fp0 | ...fp0 IS EXP(R)-1| ...fp2 released fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored|--Step 5|--Compute 2^(J/64)*p fmulx (%a1),%fp0 | ...2^(J/64)*(Exp(R)-1)|--Step 6|--Step 6.1 movel L_SCR1(%a6),%d0 | ...retrieve M cmpil #63,%d0 bles MLE63|--Step 6.2 M >= 64 fmoves 12(%a1),%fp1 | ...fp1 is t faddx ONEBYSC(%a6),%fp1 | ...fp1 is t+OnebySc faddx %fp1,%fp0 | ...p+(t+OnebySc), fp1 released faddx (%a1),%fp0 | ...T+(p+(t+OnebySc)) bras EM1SCALEMLE63:|--Step 6.3 M <= 63 cmpil #-3,%d0 bges MGEN3MLTN3:|--Step 6.4 M <= -4 fadds 12(%a1),%fp0 | ...p+t faddx (%a1),%fp0 | ...T+(p+t) faddx ONEBYSC(%a6),%fp0 | ...OnebySc + (T+(p+t)) bras EM1SCALEMGEN3:|--Step 6.5 -3 <= M <= 63 fmovex (%a1)+,%fp1 | ...fp1 is T fadds (%a1),%fp0 | ...fp0 is p+t faddx ONEBYSC(%a6),%fp1 | ...fp1 is T+OnebySc faddx %fp1,%fp0 | ...(T+OnebySc)+(p+t)EM1SCALE:|--Step 6.6 fmovel %d1,%FPCR fmulx SC(%a6),%fp0 bra t_frcinxEM1SM:|--Step 7 |X| < 1/4. cmpil #0x3FBE0000,%d0 | ...2^(-65) bges EM1POLYEM1TINY:|--Step 8 |X| < 2^(-65) cmpil #0x00330000,%d0 | ...2^(-16312) blts EM12TINY|--Step 8.2 movel #0x80010000,SC(%a6) | ...SC is -2^(-16382) movel #0x80000000,SC+4(%a6) clrl SC+8(%a6) fmovex (%a0),%fp0 fmovel %d1,%FPCR faddx SC(%a6),%fp0 bra t_frcinxEM12TINY:|--Step 8.3 fmovex (%a0),%fp0 fmuld TWO140,%fp0 movel #0x80010000,SC(%a6) movel #0x80000000,SC+4(%a6) clrl SC+8(%a6) faddx SC(%a6),%fp0 fmovel %d1,%FPCR fmuld TWON140,%fp0 bra t_frcinxEM1POLY:|--Step 9 exp(X)-1 by a simple polynomial fmovex (%a0),%fp0 | ...fp0 is X fmulx %fp0,%fp0 | ...fp0 is S := X*X fmovemx %fp2-%fp2/%fp3,-(%a7) | ...save fp2 fmoves #0x2F30CAA8,%fp1 | ...fp1 is B12 fmulx %fp0,%fp1 | ...fp1 is S*B12 fmoves #0x310F8290,%fp2 | ...fp2 is B11 fadds #0x32D73220,%fp1 | ...fp1 is B10+S*B12 fmulx %fp0,%fp2 | ...fp2 is S*B11 fmulx %fp0,%fp1 | ...fp1 is S*(B10 + ... fadds #0x3493F281,%fp2 | ...fp2 is B9+S*... faddd EM1B8,%fp1 | ...fp1 is B8+S*... fmulx %fp0,%fp2 | ...fp2 is S*(B9+... fmulx %fp0,%fp1 | ...fp1 is S*(B8+... faddd EM1B7,%fp2 | ...fp2 is B7+S*... faddd EM1B6,%fp1 | ...fp1 is B6+S*... fmulx %fp0,%fp2 | ...fp2 is S*(B7+... fmulx %fp0,%fp1 | ...fp1 is S*(B6+... faddd EM1B5,%fp2 | ...fp2 is B5+S*... faddd EM1B4,%fp1 | ...fp1 is B4+S*... fmulx %fp0,%fp2 | ...fp2 is S*(B5+... fmulx %fp0,%fp1 | ...fp1 is S*(B4+... faddd EM1B3,%fp2 | ...fp2 is B3+S*... faddx EM1B2,%fp1 | ...fp1 is B2+S*... fmulx %fp0,%fp2 | ...fp2 is S*(B3+... fmulx %fp0,%fp1 | ...fp1 is S*(B2+... fmulx %fp0,%fp2 | ...fp2 is S*S*(B3+...) fmulx (%a0),%fp1 | ...fp1 is X*S*(B2... fmuls #0x3F000000,%fp0 | ...fp0 is S*B1 faddx %fp2,%fp1 | ...fp1 is Q| ...fp2 released fmovemx (%a7)+,%fp2-%fp2/%fp3 | ...fp2 restored faddx %fp1,%fp0 | ...fp0 is S*B1+Q| ...fp1 released fmovel %d1,%FPCR faddx (%a0),%fp0 bra t_frcinxEM1BIG:|--Step 10 |X| > 70 log2 movel (%a0),%d0 cmpil #0,%d0 bgt EXPC1|--Step 10.2 fmoves #0xBF800000,%fp0 | ...fp0 is -1 fmovel %d1,%FPCR fadds #0x00800000,%fp0 | ...-1 + 2^(-126) bra t_frcinx |end
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -