⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 e_acosl.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 4 页
字号:
LOCAL_OBJECT_START(poly_coeffs)       // C_3data8 0xaaaaaaaaaaaaaaab, 0x0000000000003ffc       // C_5data8 0x999999999999999a, 0x0000000000003ffb       // C_7, C_9data8 0x3fa6db6db6db6db7, 0x3f9f1c71c71c71c8       // pi/2 (low, high)data8 0x3C91A62633145C07, 0x3FF921FB54442D18       // C_11, C_13data8 0x3f96e8ba2e8ba2e9, 0x3f91c4ec4ec4ec4e       // C_15, C_17data8 0x3f8c99999999999a, 0x3f87a87878787223       // pi (low, high)data8 0x3CA1A62633145C07, 0x400921FB54442D18LOCAL_OBJECT_END(poly_coeffs)R_DBL_S = r21R_EXP0 = r22R_EXP = r15R_SGNMASK = r23R_TMP = r24R_TMP2 = r25R_INDEX = r26R_TMP3 = r27R_TMP03 = r27R_TMP4 = r28R_TMP5 = r23R_TMP6 = r22R_TMP7 = r21R_T = r29R_BIAS = r20F_T = f6F_1S2 = f7F_1S2_S = f9F_INV_1T2 = f10F_SQRT_1T2 = f11F_S2T2 = f12F_X = f13F_D = f14F_2M64 = f15F_CS2 = f32F_CS3 = f33F_CS4 = f34F_CS5 = f35F_CS6 = f36F_CS7 = f37F_CS8 = f38F_CS9 = f39F_S23 = f40 F_S45 = f41 F_S67 = f42 F_S89 = f43 F_S25 = f44 F_S69 = f45 F_S29 = f46 F_X2 = f47 F_X4 = f48 F_TSQRT = f49 F_DTX = f50 F_R = f51 F_R2 = f52 F_R3 = f53 F_R4 = f54 F_C3 = f55 F_C5 = f56 F_C7 = f57 F_C9 = f58 F_P79 = f59 F_P35 = f60 F_P39 = f61 F_ATHI = f62 F_ATLO = f63 F_T1 = f64 F_Y = f65 F_Y2 = f66 F_ANDMASK = f67 F_ORMASK = f68 F_S = f69 F_05 = f70 F_SQRT_1S2 = f71 F_DS = f72 F_Z = f73 F_1T2 = f74 F_DZ = f75 F_ZE = f76 F_YZ = f77 F_Y1S2 = f78 F_Y1S2X = f79 F_1X = f80 F_ST = f81 F_1T2_ST = f82 F_TSS = f83 F_Y1S2X2 = f84 F_DZ_TERM = f85 F_DTS = f86 F_DS2X = f87 F_T2 = f88 F_ZY1S2S = f89 F_Y1S2_1X = f90 F_TS = f91F_PI2_LO = f92 F_PI2_HI = f93 F_S19 = f94 F_INV1T2_2 = f95 F_CORR = f96 F_DZ0 = f97 F_C11 = f98 F_C13 = f99 F_C15 = f100F_C17 = f101F_P1113 = f102F_P1517 = f103F_P1117 = f104F_P317 = f105F_R8 = f106F_HI = f107F_1S2_HI = f108F_DS2 = f109F_Y2_2 = f110//F_S2 = f111//F_S_DS2 = f112F_S_1S2S = f113F_XL = f114F_2M128 = f115F_1AS = f116F_AS = f117.section .textGLOBAL_LIBM_ENTRY(acosl){.mfi       // get exponent, mantissa (rounded to double precision) of s       getf.d R_DBL_S = f8       // 1-s^2       fnma.s1 F_1S2 = f8, f8, f1       // r2 = pointer to T_table       addl r2 = @ltoff(T_table), gp}{.mfi       // sign mask       mov R_SGNMASK = 0x20000       nop.f 0       // bias-63-1       mov R_TMP03 = 0xffff-64;;}{.mfi       // get exponent of s       getf.exp R_EXP = f8       nop.f 0       // R_TMP4 = 2^45       shl R_TMP4 = R_SGNMASK, 45-17}{.mlx       // load bias-4       mov R_TMP = 0xffff-4       // load RU(sqrt(2)/2) to integer register (in double format, shifted left by 1)       movl R_TMP2 = 0x7fcd413cccfe779a;;}{.mfi       // load 2^{-64} in FP register       setf.exp F_2M64 = R_TMP03       nop.f 0       // index = (0x7-exponent)|b1 b2.. b6       extr.u R_INDEX = R_DBL_S, 46, 9}{.mfi       // get t = sign|exponent|b1 b2.. b6 1 x.. x       or R_T = R_DBL_S, R_TMP4       nop.f 0       // R_TMP4 = 2^45-1       sub R_TMP4 = R_TMP4, r0, 1;;}{.mfi       // get t = sign|exponent|b1 b2.. b6 1 0.. 0       andcm R_T = R_T, R_TMP4       nop.f 0       // eliminate sign from R_DBL_S (shift left by 1)       shl R_TMP3 = R_DBL_S, 1}{.mfi       // R_BIAS = 3*2^6       mov R_BIAS = 0xc0       nop.f 0       // eliminate sign from R_EXP       andcm R_EXP0 = R_EXP, R_SGNMASK;;}{.mfi       // load start address for T_table       ld8 r2 = [r2]       nop.f 0       // p8 = 1 if |s|> = sqrt(2)/2       cmp.geu p8, p0 = R_TMP3, R_TMP2}{.mlx       // p7 = 1 if |s|<2^{-4} (exponent of s<bias-4)       cmp.lt p7, p0 = R_EXP0, R_TMP       // sqrt coefficient cs8 = -33*13/128       movl R_TMP2 = 0xc0568000;;}{.mbb       // load t in FP register       setf.d F_T = R_T       // if |s|<2^{-4}, take alternate path (p7) br.cond.spnt SMALL_S       // if |s|> = sqrt(2)/2, take alternate path (p8) br.cond.sptk LARGE_S}{.mlx       // index = (4-exponent)|b1 b2.. b6       sub R_INDEX = R_INDEX, R_BIAS       // sqrt coefficient cs9 = 55*13/128       movl R_TMP = 0x40b2c000;;}{.mfi       // sqrt coefficient cs8 = -33*13/128       setf.s F_CS8 = R_TMP2       nop.f 0       // shift R_INDEX by 5       shl R_INDEX = R_INDEX, 5}{.mfi       // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)       mov R_TMP4 = 0xffff - 1       nop.f 0       // sqrt coefficient cs6 = -21/16       mov R_TMP6 = 0xbfa8;;}{.mlx       // table index       add r2 = r2, R_INDEX       // sqrt coefficient cs7 = 33/16       movl R_TMP2 = 0x40040000;;}{.mmi       // load cs9 = 55*13/128       setf.s F_CS9 = R_TMP       // sqrt coefficient cs5 = 7/8       mov R_TMP3 = 0x3f60       // sqrt coefficient cs6 = 21/16       shl R_TMP6 = R_TMP6, 16;;}{.mmi       // load significand of 1/(1-t^2)       ldf8 F_INV_1T2 = [r2], 8       // sqrt coefficient cs7 = 33/16       setf.s F_CS7 = R_TMP2       // sqrt coefficient cs4 = -5/8       mov R_TMP5 = 0xbf20;;}{.mmi       // load significand of sqrt(1-t^2)       ldf8 F_SQRT_1T2 = [r2], 8       // sqrt coefficient cs6 = 21/16       setf.s F_CS6 = R_TMP6       // sqrt coefficient cs5 = 7/8       shl R_TMP3 = R_TMP3, 16;;}{.mmi       // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)       setf.exp F_CS3 = R_TMP4       // r3 = pointer to polynomial coefficients       addl r3 = @ltoff(poly_coeffs), gp       // sqrt coefficient cs4 = -5/8       shl R_TMP5 = R_TMP5, 16;;}{.mfi       // sqrt coefficient cs5 = 7/8       setf.s F_CS5 = R_TMP3       // d = s-t       fms.s1 F_D = f8, f1, F_T       // set p6 = 1 if s<0, p11 = 1 if s> = 0       cmp.ge p6, p11 = R_EXP, R_DBL_S}{.mfi       // r3 = load start address to polynomial coefficients       ld8 r3 = [r3]       // s+t       fma.s1 F_S2T2 = f8, f1, F_T       nop.i 0;;}{.mfi       // sqrt coefficient cs4 = -5/8       setf.s F_CS4 = R_TMP5       // s^2-t^2       fma.s1 F_S2T2 = F_S2T2, F_D, f0       nop.i 0;;}{.mfi       // load C3       ldfe F_C3 = [r3], 16       // 0.5/(1-t^2) = 2^{-64}*(2^63/(1-t^2))       fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0       nop.i 0;;}{.mfi       // load C_5       ldfe F_C5 = [r3], 16       // set correct exponent for sqrt(1-t^2)       fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0       nop.i 0;;}{.mfi       // load C_7, C_9       ldfpd F_C7, F_C9 = [r3], 16       // x = -(s^2-t^2)/(1-t^2)/2       fnma.s1 F_X = F_INV_1T2, F_S2T2, f0       nop.i 0;;}{.mmf       // load asin(t)_high, asin(t)_low       ldfpd F_ATHI, F_ATLO = [r2]	   // load pi/2	   ldfpd F_PI2_LO, F_PI2_HI = [r3]       // t*sqrt(1-t^2)       fma.s1 F_TSQRT = F_T, F_SQRT_1T2, f0;;}{.mfi       nop.m 0       // cs9*x+cs8       fma.s1 F_S89 = F_CS9, F_X, F_CS8       nop.i 0}{.mfi       nop.m 0       // cs7*x+cs6       fma.s1 F_S67 = F_CS7, F_X, F_CS6       nop.i 0;;}{.mfi       nop.m 0       // cs5*x+cs4       fma.s1 F_S45 = F_CS5, F_X, F_CS4       nop.i 0}{.mfi       nop.m 0       // x*x       fma.s1 F_X2 = F_X, F_X, f0       nop.i 0;;}{.mfi       nop.m 0       // (s-t)-t*x       fnma.s1 F_DTX = F_T, F_X, F_D       nop.i 0}{.mfi       nop.m 0       // cs3*x+cs2 (cs2 = -0.5 = -cs3)       fms.s1 F_S23 = F_CS3, F_X, F_CS3       nop.i 0;;}{.mfi  nop.m 0  // if sign is negative, negate table values: asin(t)_low  (p6) fnma.s1 F_ATLO = F_ATLO, f1, f0  nop.i 0}{.mfi  nop.m 0  // if sign is negative, negate table values: asin(t)_high  (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0  nop.i 0;;}{.mfi       nop.m 0       // cs9*x^3+cs8*x^2+cs7*x+cs6       fma.s1 F_S69 = F_S89, F_X2, F_S67       nop.i 0}{.mfi       nop.m 0       // x^4       fma.s1 F_X4 = F_X2, F_X2, f0       nop.i 0;;}{.mfi       nop.m 0       // t*sqrt(1-t^2)*x^2       fma.s1 F_TSQRT = F_TSQRT, F_X2, f0       nop.i 0}{.mfi       nop.m 0       // cs5*x^3+cs4*x^2+cs3*x+cs2       fma.s1 F_S25 = F_S45, F_X2, F_S23       nop.i 0;;}{.mfi       nop.m 0       // ((s-t)-t*x)*sqrt(1-t^2)       fma.s1 F_DTX = F_DTX, F_SQRT_1T2, f0       nop.i 0;;}{.mfi       nop.m 0       // (pi/2)_high - asin(t)_high       fnma.s1 F_ATHI = F_ATHI, f1, F_PI2_HI       nop.i 0}{.mfi       nop.m 0       // asin(t)_low - (pi/2)_low       fnma.s1 F_ATLO = F_PI2_LO, f1, F_ATLO	   nop.i 0;;}{.mfi       nop.m 0       // PS29 = cs9*x^7+..+cs5*x^3+cs4*x^2+cs3*x+cs2       fma.s1 F_S29 = F_S69, F_X4, F_S25       nop.i 0;;}{.mfi       nop.m 0       // R = ((s-t)-t*x)*sqrt(1-t^2)-t*sqrt(1-t^2)*x^2*PS29       fnma.s1 F_R = F_S29, F_TSQRT, F_DTX       nop.i 0;;}{.mfi       nop.m 0       // R^2       fma.s1 F_R2 = F_R, F_R, f0       nop.i 0;;}{.mfi       nop.m 0       // c7+c9*R^2       fma.s1 F_P79 = F_C9, F_R2, F_C7       nop.i 0}{.mfi       nop.m 0       // c3+c5*R^2       fma.s1 F_P35 = F_C5, F_R2, F_C3       nop.i 0;;}{.mfi       nop.m 0       // R^3       fma.s1 F_R4 = F_R2, F_R2, f0       nop.i 0;;}{.mfi       nop.m 0       // R^3       fma.s1 F_R3 = F_R2, F_R, f0       nop.i 0;;}{.mfi       nop.m 0       // c3+c5*R^2+c7*R^4+c9*R^6       fma.s1 F_P39 = F_P79, F_R4, F_P35       nop.i 0;;}{.mfi       nop.m 0       // asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)       fma.s1 F_P39 = F_P39, F_R3, F_ATLO       nop.i 0;;}{.mfi       nop.m 0       // R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)       fma.s1 F_P39 = F_P39, f1, F_R       nop.i 0;;}{.mfb       nop.m 0       // result = (pi/2)-asin(t)_high+R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)       fnma.s0 f8 = F_P39, f1, F_ATHI       // return       br.ret.sptk b0;;}LARGE_S:{.mfi       // bias-1       mov R_TMP3 = 0xffff - 1       // y ~ 1/sqrt(1-s^2)       frsqrta.s1 F_Y, p7 = F_1S2       // c9 = 55*13*17/128       mov R_TMP4 = 0x10af7b}{.mlx       // c8 = -33*13*15/128       mov R_TMP5 = 0x184923       movl R_TMP2 = 0xff00000000000000;;}{.mfi       // set p6 = 1 if s<0, p11 = 1 if s>0       cmp.ge p6, p11 = R_EXP, R_DBL_S       // 1-s^2       fnma.s1 F_1S2 = f8, f8, f1       // set p9 = 1       cmp.eq p9, p0 = r0, r0;;}{.mfi       // load 0.5       setf.exp F_05 = R_TMP3       // (1-s^2) rounded to single precision       fnma.s.s1 F_1S2_S = f8, f8, f1       // c9 = 55*13*17/128       shl R_TMP4 = R_TMP4, 10}{.mlx       // AND mask for getting t ~ sqrt(1-s^2)       setf.sig F_ANDMASK = R_TMP2       // OR mask       movl R_TMP2 = 0x0100000000000000;;}.pred.rel "mutex", p6, p11{.mfi       nop.m 0	   // 1-|s| (p6)  fma.s1 F_1AS = f8, f1, f1       nop.i 0}{.mfi       nop.m 0       // 1-|s| (p11) fnma.s1 F_1AS = f8, f1, f1       nop.i 0;;}{.mfi       // c9 = 55*13*17/128       setf.s F_CS9 = R_TMP4	   // |s| (p6)  fnma.s1 F_AS = f8, f1, f0       // c8 = -33*13*15/128       shl R_TMP5 = R_TMP5, 11}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -