📄 erf_utils.h
字号:
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_34)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_33)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_32)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_31)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_30)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_29)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_28)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_27)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_26)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_25)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_24)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_23)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_22)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_21)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_20)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_19)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_18)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_17)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_16)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_15)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_14)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_13)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_12)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_11)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_10)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_09)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_08)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_07)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_06)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_05)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_04)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_03)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_02)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_01)); \ _tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_00)); \ _tresult = spu_mul(_tresult, _xabs); \ _tresult = spu_mul(_tresult, spu_splats((float)TWO_OVER_SQRT_PI)); \} /* * Continued Fractions Approximation of Erfc() * ( ) * 1 ( 1 v 2v 3v ) * erfc(x) = ------------------------- * ( --- --- --- --- ... ) * sqrt(pi) * x * exp(x^2) ( 1+ 1+ 1+ 1+ ) * ( ) * Continued Fractions * 1 * v = ----- * 2*x^2 * * We are using a backward recurrence calculation to estimate the continued fraction. * * p = a p + b q * m,n m m+1,n m m+1,n * * q = p * m,n m+1,n * * With, * * p = a ; q = 1 * n,n n n,n * * * a = 0, b = 1, * 0 0 * * a = 1, b = n/2x^2 * n n * * * F = p / q * 0,n 0,n 0,n * * Ref: "Computing the Incomplete Gamma Function to Arbitrary Precision", * by Serge Winitzki, Department of Physics, Ludwig-Maximilians University, Munich, Germany. * */#define CONTFRAC_ERFCF4(_xabs, _xsqu, _presult) { \ vec_float4 v; \ vec_float4 p, q, plast, qlast; \ vec_float4 factor; \ vec_float4 inv_xsqu; \ inv_xsqu = _recipf4(_xsqu); \ v = spu_mul(inv_xsqu, onehalff); \ p = spu_splats(3.025f); q = onef; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(40.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(39.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(38.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(37.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(36.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(35.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(34.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(33.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(32.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(31.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(30.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(29.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(28.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(27.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(26.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(25.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(24.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(23.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(22.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(21.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(20.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(19.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(18.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(17.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(16.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(15.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(14.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(13.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(12.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(11.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(10.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 9.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 8.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 7.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 6.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 5.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0f)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0f)), plast); q = plast; plast = p; qlast = q; \ p = qlast; q = plast; \ factor = spu_mul(spu_splats((float)SQRT_PI), spu_mul(_xabs, _expf4(_xsqu))); \ _presult = _divf4(p, spu_mul(factor, q)); \}#define CONTFRAC_ERFC(_xabs, _xsqu, _presult) { \ vec_double2 v; \ vec_double2 p, q, plast, qlast; \ vec_double2 factor; \ vec_double2 inv_xsqu; \ inv_xsqu = _recipd2(_xsqu); \ v = spu_mul(inv_xsqu, onehalfd); \ p = spu_splats(3.025); q = oned; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(40.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(39.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(38.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(37.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(36.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(35.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(34.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(33.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(32.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(31.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(30.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(29.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(28.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(27.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(26.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(25.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(24.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(23.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(22.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(21.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(20.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(19.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(18.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(17.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(16.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(15.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(14.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(13.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(12.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(11.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats(10.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 9.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 8.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 7.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 6.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 5.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0)), plast); q = plast; plast = p; qlast = q; \ p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0)), plast); q = plast; plast = p; qlast = q; \ p = qlast; q = plast; \ factor = spu_mul(spu_splats(SQRT_PI), spu_mul(_xabs, _expd2(_xsqu))); \ _presult = _divd2(p, spu_mul(factor, q)); \}#endif /* _ERF_UTILS_H_ */#endif /* __SPU__ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -