📄 statfunctions.java

📁 这是一个matlab的java实现。里面有许多内容。请大家慢慢捉摸。
💻 JAVA
字号:
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).   http://www.cs.umass.edu/~mccallum/mallet   This software is provided under the terms of the Common Public License,   version 1.0, as published by http://www.opensource.org.  For further   information, see the file `LICENSE' included with this distribution. *//**    @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */package edu.umass.cs.mallet.base.util;import edu.umass.cs.mallet.base.util.MalletLogger;import java.util.logging.*;// Obtained from http://www.stat.vt.edu/~sundar/java/code/StatFunctions.html// August 2002/** * @(#)StatFunctions.java * * DAMAGE (c) 2000 by Sundar Dorai-Raj  * * @author Sundar Dorai-Raj  * * Email: sdoraira@vt.edu  * * This program is free software; you can redistribute it and/or  * * modify it under the terms of the GNU General Public License   * * as published by the Free Software Foundation; either version 2   * * of the License, or (at your option) any later version,   * * provided that any use properly credits the author.   * * This program is distributed in the hope that it will be useful,  * * but WITHOUT ANY WARRANTY; without even the implied warranty of  * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the  * * GNU General Public License for more details at http://www.gnu.org * * */import java.io.*;public final class StatFunctions {	private static Logger logger = MalletLogger.getLogger(StatFunctions.class.getName());  public static double cov(Univariate x,Univariate y) {    double sumxy=0;    int i,n=(x.size()>=y.size() ? x.size():y.size());    try {      for(i=0;i<x.size();i++)        sumxy+=(x.elementAt(i)-x.mean())*(y.elementAt(i)-y.mean());    }    catch (ArrayIndexOutOfBoundsException e) {      logger.info ("size of x != size of y");      e.printStackTrace();    }    return(sumxy/(n-1));  }  public static double corr(Univariate x,Univariate y) {    double cov=cov(x,y);    return(cov/(x.stdev()*y.stdev()));  }  public static double[] ols(Univariate x,Univariate y) {     double[] coef=new double[2];     int i,n=(x.size()<=y.size() ? x.size():y.size());     double sxy=0.0,sxx=0.0;     double xbar=x.mean(),ybar=y.mean(),xi,yi;     for(i=0;i<n;i++) {       xi=x.elementAt(i);       yi=y.elementAt(i);       sxy+=(xi-xbar)*(yi-ybar);       sxx+=(xi-xbar)*(xi-xbar);     }     coef[0]=sxy/sxx;     coef[1]=ybar-coef[0]*xbar;     return(coef);  }  public static double qnorm(double p,boolean upper) {    /* Reference:       J. D. Beasley and S. G. Springer        Algorithm AS 111: "The Percentage Points of the Normal Distribution"       Applied Statistics    */    if(p<0 || p>1)      throw new IllegalArgumentException("Illegal argument "+p+" for qnorm(p).");    double split=0.42,           a0=  2.50662823884,           a1=-18.61500062529,           a2= 41.39119773534,           a3=-25.44106049637,           b1= -8.47351093090,           b2= 23.08336743743,           b3=-21.06224101826,           b4=  3.13082909833,           c0= -2.78718931138,           c1= -2.29796479134,           c2=  4.85014127135,           c3=  2.32121276858,           d1=  3.54388924762,           d2=  1.63706781897,           q=p-0.5;    double r,ppnd;    if(Math.abs(q)<=split) {      r=q*q;      ppnd=q*(((a3*r+a2)*r+a1)*r+a0)/((((b4*r+b3)*r+b2)*r+b1)*r+1);    }    else {      r=p;      if(q>0) r=1-p;      if(r>0) {        r=Math.sqrt(-Math.log(r));        ppnd=(((c3*r+c2)*r+c1)*r+c0)/((d2*r+d1)*r+1);        if(q<0) ppnd=-ppnd;      }      else {        ppnd=0;      }    }    if(upper) ppnd=1-ppnd;    return(ppnd);  }  public static double qnorm(double p,boolean upper,double mu,double sigma2) {    return(qnorm(p,upper)*Math.sqrt(sigma2)+mu);  }  public static double pnorm(double z,boolean upper) {    /* Reference:       I. D. Hill        Algorithm AS 66: "The Normal Integral"       Applied Statistics    */    double ltone=7.0,           utzero=18.66,           con=1.28,           a1 = 0.398942280444,           a2 = 0.399903438504,           a3 = 5.75885480458,           a4 =29.8213557808,           a5 = 2.62433121679,           a6 =48.6959930692,           a7 = 5.92885724438,           b1 = 0.398942280385,           b2 = 3.8052e-8,           b3 = 1.00000615302,           b4 = 3.98064794e-4,           b5 = 1.986153813664,           b6 = 0.151679116635,           b7 = 5.29330324926,           b8 = 4.8385912808,           b9 =15.1508972451,           b10= 0.742380924027,           b11=30.789933034,           b12= 3.99019417011;    double y,alnorm;    if(z<0) {      upper=!upper;      z=-z;    }    if(z<=ltone || upper && z<=utzero) {      y=0.5*z*z;      if(z>con) {        alnorm=b1*Math.exp(-y)/(z-b2+b3/(z+b4+b5/(z-b6+b7/(z+b8-b9/(z+b10+b11/(z+b12))))));      }      else {        alnorm=0.5-z*(a1-a2*y/(y+a3-a4/(y+a5+a6/(y+a7))));      }    }    else {      alnorm=0;    }    if(!upper) alnorm=1-alnorm;    return(alnorm);  }  public static double pnorm(double x,boolean upper,double mu,double sigma2) {    return(pnorm((x-mu)/Math.sqrt(sigma2),upper));  }  public static double qt(double p,double ndf,boolean lower_tail) {    // Algorithm 396: Student's t-quantiles by    // G.W. Hill CACM 13(10), 619-620, October 1970    if(p<=0 || p>=1 || ndf<1)       throw new IllegalArgumentException("Invalid p or df in call to qt(double,double,boolean).");    double eps=1e-12;    double M_PI_2=1.570796326794896619231321691640; // pi/2    boolean neg;    double P,q,prob,a,b,c,d,y,x;    if((lower_tail && p > 0.5) || (!lower_tail && p < 0.5)) {       neg = false;       P = 2 * (lower_tail ? (1 - p) : p);     }     else {       neg = true;       P = 2 * (lower_tail ? p : (1 - p));     }     if(Math.abs(ndf - 2) < eps) {   /* df ~= 2 */       q = Math.sqrt(2 / (P * (2 - P)) - 2);     }     else if (ndf < 1 + eps) {   /* df ~= 1 */       prob = P * M_PI_2;       q = Math.cos(prob)/Math.sin(prob);     }     else {      /*-- usual case;  including, e.g.,  df = 1.1 */       a = 1 / (ndf - 0.5);       b = 48 / (a * a);       c = ((20700 * a / b - 98) * a - 16) * a + 96.36;       d = ((94.5 / (b + c) - 3) / b + 1) * Math.sqrt(a * M_PI_2) * ndf;       y = Math.pow(d * P, 2 / ndf);       if (y > 0.05 + a) {         /* Asymptotic inverse expansion about normal */         x = qnorm(0.5 * P,false);         y = x * x;         if (ndf < 5)           c += 0.3 * (ndf - 4.5) * (x + 0.6);         c = (((0.05 * d * x - 5) * x - 7) * x - 2) * x + b + c;         y = (((((0.4 * y + 6.3) * y + 36) * y + 94.5) / c - y - 3) / b + 1) * x;         y = a * y * y;         if (y > 0.002)/* FIXME: This cutoff is machine-precision dependent*/           y = Math.exp(y) - 1;         else { /* Taylor of    e^y -1 : */           y = (0.5 * y + 1) * y;         }       }       else {         y = ((1 / (((ndf + 6) / (ndf * y) - 0.089 * d - 0.822)             * (ndf + 2) * 3) + 0.5 / (ndf + 4))             * y - 1) * (ndf + 1) / (ndf + 2) + 1 / y;       }       q = Math.sqrt(ndf * y);     }     if(neg) q = -q;     return q;  }  public static double pt(double t,double df) {    // ALGORITHM AS 3  APPL. STATIST. (1968) VOL.17, P.189    // Computes P(T<t)    double a,b,idf,im2,ioe,s,c,ks,fk,k;    double g1=0.3183098862;// =1/pi;    if(df<1)      throw new IllegalArgumentException("Illegal argument df for pt(t,df).");    idf=df;    a=t/Math.sqrt(idf);    b=idf/(idf+t*t);    im2=df-2;    ioe=idf%2;    s=1;    c=1;    idf=1;    ks=2+ioe;    fk=ks;    if(im2>=2) {      for(k=ks;k<=im2;k+=2) {        c=c*b*(fk-1)/fk;        s+=c;        if(s!=idf) {          idf=s;          fk+=2;        }      }    }    if(ioe!=1)      return 0.5+0.5*a*Math.sqrt(b)*s;    if(df==1) s=0;    return 0.5+(a*b*s+Math.atan(a))*g1;  }  public double pchisq(double q,double df) {    // Posten, H. (1989) American Statistician 43 p. 261-265    double df2=df*.5;    double q2=q*.5;    int n=5,k;    double tk,CFL,CFU,prob;    if(q<=0 || df<=0)      throw new IllegalArgumentException("Illegal argument "+q+" or "+df+" for qnorm(p).");    if(q<df) {     tk=q2*(1-n-df2)/(df2+2*n-1+n*q2/(df2+2*n));     for(k=n-1;k>1;k--)       tk=q2*(1-k-df2)/(df2+2*k-1+k*q2/(df2+2*k+tk));     CFL=1-q2/(df2+1+q2/(df2+2+tk));     prob=Math.exp(df2*Math.log(q2)-q2-Maths.logGamma(df2+1)-Math.log(CFL));    }    else {      tk=(n-df2)/(q2+n);      for(k=n-1;k>1;k--)        tk=(k-df2)/(q2+k/(1+tk));      CFU=1+(1-df2)/(q2+1/(1+tk));      prob=1-Math.exp((df2-1)*Math.log(q2)-q2-Maths.logGamma(df2)-Math.log(CFU));    }    return prob;  }    public static double betainv(double x,double p,double q) {    // ALGORITHM AS 63 APPL. STATIST. VOL.32, NO.1    // Computes P(Beta>x)    double beta=Maths.logBeta(p,q),acu=1E-14;    double cx,psq,pp,qq,x2,term,ai,betain,ns,rx,temp;    boolean indx;    if(p<=0 || q<=0) return(-1.0);    if(x<=0 || x>=1) return(-1.0);    psq=p+q;    cx=1-x;    if(p<psq*x) {      x2=cx;      cx=x;      pp=q;      qq=p;      indx=true;    }    else {      x2=x;      pp=p;      qq=q;      indx=false;    }    term=1;    ai=1;    betain=1;    ns=qq+cx*psq;    rx=x2/cx;    temp=qq-ai;    if(ns==0) rx=x2;    while(temp>acu && temp>acu*betain) {      term=term*temp*rx/(pp+ai);      betain=betain+term;      temp=Math.abs(term);      if(temp>acu && temp>acu*betain) {        ai++;        ns--;        if(ns>=0) {          temp=qq-ai;          if(ns==0) rx=x2;        }        else {          temp=psq;          psq+=1;        }      }    }    betain*=Math.exp(pp*Math.log(x2)+(qq-1)*Math.log(cx)-beta)/pp;    if(indx) betain=1-betain;    return(betain);  }  public static double pf(double x,double df1,double df2) {    // ALGORITHM AS 63 APPL. STATIST. VOL.32, NO.1    // Computes P(F>x)    return(betainv(df1*x/(df1*x+df2),0.5*df1,0.5*df2));  }  }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -