📄 chisquaredcalculator.java
字号:
{
if(i==0) {
if(reverse) {
product = (1.0f - fws[i]);
}
else
{
product = fws[i];
}
}
else
{
if(reverse) {
product *= (1.0d - fws[i]);
}
else
{
product *= fws[i];
}
}
}
double log = (double)Math.log(product);
chi = -2.0f * log;
return chi;
}
/**
* Does the same as calculateChi, but does so on 1 - f(w).
* <br/><br/>
* That is:
* <p>
* <pre>
* -2 ln <span style="font-size:16pt">∏</span>(1 - f(w))
* </pre>
* </p>
* <p>
* @param fws
* @return The reverse chi computation as a double
* @see ChiSquaredCalculator#calculateChi(double[])
*/
public double calculateReverseChi(double[] fws) {
return calculateChi(fws, true);
}
/**
* Calculates the inverse chi square for the given chi distribution.
* <p>
* Again taken from Gary Robinsons writings, this is defined as:
* <pre style="font-family:times new roman; font-size:14pt">
* <em>H</em> = <em>C<sup style="font-size:10pt">-1</sup></em>(<span style="font-size:11pt"> -2 ln </span>( <span style="font-size:14pt">∏</span> <span style="font-size:11pt">f(w)</span> )<sup><em>y</em></sup>, <span style="font-size:11pt">2<em>ny</em></span>)
* </pre>
* </p>
* Where:
* <ul>
* <li/>C<sup style="font-size:10pt">-1</sup> is the inverse chi squared function
* <li/><em>y</em> is the ESF (effective size factor) as defined in the JasenEngineConfiguration
* <li/><em>n</em> is the number of tokens
* </ul>
* @param fChi The chi distribution calculated from calculateChi()
* @param n The number of tokens
* @return The inverse chi squared probability
*/
public double calculateInverseChiSquare(double fChi, int n) {
int ftt = JasenEngineConfiguration.getInstance().getFtt();
double fESF = JasenEngineConfiguration.getInstance().getEsf();
double fResult;
int iChiDF = 2*n;
if (((double)(iChiDF) * fESF) < (double)ftt)
fResult = chi2pFewTokens(fChi, iChiDF, fESF);
else
fResult = chi2pManyTokens(fChi, iChiDF, fESF);
return fResult;
}
private double chi2pFewTokens(double fChi, int iChiDF, double fESF) {
/*
This is more efficient than _chi2pManyTokens for
small values of iChiDF*fESF, and is more
accurate. However it can't handle values of iChiDF*fESF
> some amount I don't recall at the time of writing this
docstring. It works up to at least iChiDF*fESF==100.0,
though _chi2pManyTokens is significantly faster at that point.
*/
double fAdjustedProduct = (double)Math.exp(((double)fESF * (-fChi)/2.0f));
int iActualSize = iChiDF / 2;
double fEffectiveSize = (double)iActualSize * fESF;
double fSum = 0.0f;
for(int i = 0; i < (int)fEffectiveSize; i++) {
fSum += Math.pow(-Math.log(fAdjustedProduct),i) / factorial(i);
}
double fFirstTerm = fAdjustedProduct * fSum;
double fSecondTerm = (double)(fAdjustedProduct * (fEffectiveSize - (int)fEffectiveSize) * (Math.pow((-Math.log(fAdjustedProduct)), fEffectiveSize)) / factorial((int)fEffectiveSize));
double fResult = fFirstTerm + fSecondTerm;
return fResult;
}
private double chi2pManyTokens(double fChi, int iChiDF, double fESF) {
/*
* Gary Robinson quote:
Use instead of _chi2pFewTokens
for large values of iDF*fESF. Suggested
cutoff is 25.0, but certainly should cutoff
by 100.0.
Except for the code involving fESF, and some renaming
of variables, this is almost exactly the
same as Tim Peters' SpamBayes chi function.
*/
double MAX_ALLOWABLE_M = 700.0f;
double iAdjustedDF = adjustDF(iChiDF, fChi, fESF);
double fM = adjustFM(iChiDF, fChi, fESF);
if (fM > MAX_ALLOWABLE_M) {
fESF = fESF * (MAX_ALLOWABLE_M / fM);
iAdjustedDF = adjustDF(iChiDF, fChi, fESF);
fM = adjustFM(iChiDF, fChi, fESF);
}
double fTerm;
double fSum = fTerm = (double)Math.exp((double)-fM);
for(int i = 1; i < (int)(iAdjustedDF / 2.0d); i++) {
fTerm *= fM / (double)i;
fSum += fTerm;
}
/*
* Gary Robinson quote:
# With small chi and large df, accumulated roundoff error, plus error in
# the platform exp(), can cause this to spill a few ULP above 1.0. For
# example, chi2p(100, 300) on my box has sum == 1.0 + 2.0**-52 at this
# point. Returning a value even a teensy bit over 1.0 is no good.
*/
return (double)Math.min(fSum, 1.0f);
}
private double adjustDF(int iDF, double fChi, double fESF) {
double iHalfDF = (double)iDF / 2.0f;
int iAdjustedHalfDF = Math.max(1,(int)(fESF * (double)iHalfDF + 0.5d));
double fAdjustedProp = (double)(iAdjustedHalfDF) / iHalfDF;
double fAdjustedChi = fChi * fAdjustedProp;
double iAdjustedDF = iAdjustedHalfDF * 2.0f;
return iAdjustedDF;
}
private double adjustFM(double iDF, double fChi, double fESF) {
double iHalfDF = (double)iDF / 2.0f;
int iAdjustedHalfDF = Math.max(1,(int)(fESF * (double)iHalfDF + 0.5d));
double fAdjustedProp = (double)(iAdjustedHalfDF) / iHalfDF;
double fAdjustedChi = fChi * fAdjustedProp;
double fAdjustedFM = fAdjustedChi / 2.0f;
return fAdjustedFM;
}
private double factorial(int x) {
if (x < 0)
return 0.0f;
double fact = 1.0f;
while (x > 1)
{
fact = fact * x;
x = x - 1;
}
return fact;
}
/**
* Test harness only
* @param args
*/
public static void main(String[] args) {
try
{
ChiSquaredCalculator calc= new ChiSquaredCalculator();
//double[] fws = new double[] {0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.805431773d,0.705176409d,0.544620517d,0.502084982d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.443618441d,0.437046715d};
//double[] fws = new double[] {0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.977073559d,0.920371405d,0.791100215d,0.761284306d,0.688686764d,0.665913441d,0.544620517d,0.505132727d,0.500000000d,0.500000000d,0.500000000d,0.485391187d,0.440922722d,0.343761476d,0.309635202d,0.287031402d,0.285771402d,0.239399599d,0.187002702d,0.187002702d,0.098080644d,0.000021385d,0.000021385d,0.000021385d};
// double[] fws = new double[] {0.485391187d,0.440922722d,0.343761476d,0.309635202d,0.287031402d,0.285771402d,0.239399599d,0.187002702d,0.187002702d,0.098080644d,0.000021385d,0.000021385d,0.000021385d,0.485391187d,0.440922722d,0.343761476d,0.309635202d,0.287031402d,0.285771402d,0.239399599d,0.187002702d,0.187002702d,0.098080644d,0.000021385d,0.000021385d,0.000021385d};
// double[] fws = new double[] {0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d};
double[] fws = new double[] {0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d};
//double[] fws = new double[] {0.9999f,0.9999f};
double chi = calc.calculateChi(fws);
System.out.println ("CHI: " + chi);
double inv = calc.calculateInverseChiSquare(chi, fws.length);
System.out.println ("INV: " + inv);
System.out.println ("N: " + fws.length);
}
catch (Exception e)
{
e.printStackTrace ();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -