⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chisquaredcalculator.java

📁 spam source codejasen-0.9jASEN - java Anti Spam ENgine.zip 如标题所示
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
        {
            if(i==0) {
                if(reverse) {
                    product = (1.0f - fws[i]);
                }
                else
                {
                    product = fws[i];
                }
            }
            else
            {
                if(reverse) {
                    product *= (1.0d - fws[i]);
                }
                else
                {
                    product *= fws[i];
                }
            }
        }

        double log = (double)Math.log(product);
        chi = -2.0f * log;

        return chi;
    }

    /**
     * Does the same as calculateChi, but does so on 1 - f(w). 
     * <br/><br/>
     * That is:
     * <p>
     * <pre>
     * 	 -2 ln <span style="font-size:16pt">&#x220F;</span>(1 - f(w))
     * </pre>
     * </p>
     * <p>
     * @param fws
     * @return The reverse chi computation as a double
     * @see ChiSquaredCalculator#calculateChi(double[])
     */
    public double calculateReverseChi(double[] fws) {
        return calculateChi(fws, true);
    }

    /**
     * Calculates the inverse chi square for the given chi distribution.
     * <p>
     * 	Again taken from Gary Robinsons writings, this is defined as:
     * 	<pre style="font-family:times new roman; font-size:14pt">
     * 	<em>H</em> = <em>C<sup style="font-size:10pt">-1</sup></em>(<span style="font-size:11pt"> -2 ln </span>( <span style="font-size:14pt">&#x220F;</span> <span style="font-size:11pt">f(w)</span> )<sup><em>y</em></sup>, <span style="font-size:11pt">2<em>ny</em></span>)
     * 	</pre>
     * </p>
     * Where:
     * <ul>
     * 	<li/>C<sup style="font-size:10pt">-1</sup> is the inverse chi squared function
     *  <li/><em>y</em> is the ESF (effective size factor) as defined in the JasenEngineConfiguration
     * 	<li/><em>n</em> is the number of tokens
     * </ul>
     * @param fChi The chi distribution calculated from calculateChi()
     * @param n The number of tokens
     * @return The inverse chi squared probability
     */
    public double calculateInverseChiSquare(double fChi, int n) {
        int ftt = JasenEngineConfiguration.getInstance().getFtt();
        double fESF = JasenEngineConfiguration.getInstance().getEsf();
        double fResult;
        int iChiDF = 2*n;

        if (((double)(iChiDF) * fESF) < (double)ftt)
            fResult = chi2pFewTokens(fChi, iChiDF, fESF);
        else
            fResult = chi2pManyTokens(fChi, iChiDF, fESF);

        return fResult;
    }

    private double chi2pFewTokens(double fChi, int iChiDF, double fESF) {
        /*
        This is more efficient than _chi2pManyTokens for
        small values of iChiDF*fESF, and is more
        accurate. However it can't handle values of iChiDF*fESF
        > some amount I don't recall at the time of writing this
        docstring. It works up to at least iChiDF*fESF==100.0,
        though _chi2pManyTokens is significantly faster at that point.
        */

        double fAdjustedProduct = (double)Math.exp(((double)fESF * (-fChi)/2.0f));
        int iActualSize = iChiDF / 2;

        double fEffectiveSize = (double)iActualSize * fESF;

        double fSum = 0.0f;

        for(int i = 0; i < (int)fEffectiveSize; i++) {
            fSum += Math.pow(-Math.log(fAdjustedProduct),i) / factorial(i);
        }

        double fFirstTerm = fAdjustedProduct * fSum;
        double fSecondTerm = (double)(fAdjustedProduct * (fEffectiveSize - (int)fEffectiveSize) * (Math.pow((-Math.log(fAdjustedProduct)), fEffectiveSize)) / factorial((int)fEffectiveSize));
        double fResult = fFirstTerm + fSecondTerm;

        return fResult;
    }

    private double chi2pManyTokens(double fChi, int iChiDF, double fESF) {
        /*
         * Gary Robinson quote:
		    Use instead of _chi2pFewTokens
		    for large values of iDF*fESF. Suggested
		    cutoff is 25.0, but certainly should cutoff
		    by 100.0.

		    Except for the code involving fESF, and some renaming
		    of variables, this is almost exactly the
		    same as Tim Peters' SpamBayes chi function.
        */

        double MAX_ALLOWABLE_M = 700.0f;

        double iAdjustedDF = adjustDF(iChiDF, fChi, fESF);
        double fM = adjustFM(iChiDF, fChi, fESF);

        if (fM > MAX_ALLOWABLE_M) {
            fESF = fESF * (MAX_ALLOWABLE_M / fM);
            iAdjustedDF = adjustDF(iChiDF, fChi, fESF);
            fM = adjustFM(iChiDF, fChi, fESF);
        }

        double fTerm;
        double fSum = fTerm = (double)Math.exp((double)-fM);

        for(int i = 1; i < (int)(iAdjustedDF / 2.0d); i++) {
            fTerm *= fM / (double)i;
            fSum += fTerm;
        }

        /*
         *  Gary Robinson quote:
	        # With small chi and large df, accumulated roundoff error, plus error in
	        # the platform exp(), can cause this to spill a few ULP above 1.0.  For
	        # example, chi2p(100, 300) on my box has sum == 1.0 + 2.0**-52 at this
	        # point.  Returning a value even a teensy bit over 1.0 is no good.
        */
        return (double)Math.min(fSum, 1.0f);
    }

    private double adjustDF(int iDF, double fChi, double fESF) {
        double iHalfDF = (double)iDF / 2.0f;
        int iAdjustedHalfDF = Math.max(1,(int)(fESF * (double)iHalfDF + 0.5d));
        double fAdjustedProp =  (double)(iAdjustedHalfDF) / iHalfDF;
        double fAdjustedChi = fChi * fAdjustedProp;
        double iAdjustedDF = iAdjustedHalfDF * 2.0f;
        return iAdjustedDF;
    }


    private double adjustFM(double iDF, double fChi, double fESF) {
        double iHalfDF = (double)iDF / 2.0f;
        int iAdjustedHalfDF = Math.max(1,(int)(fESF * (double)iHalfDF + 0.5d));
        double fAdjustedProp =  (double)(iAdjustedHalfDF) / iHalfDF;
        double fAdjustedChi = fChi * fAdjustedProp;

        double fAdjustedFM = fAdjustedChi / 2.0f;

        return fAdjustedFM;
    }


    private double factorial(int x) {
        if (x < 0)
            return 0.0f;

        double fact = 1.0f;

        while (x > 1)
        {
            fact = fact * x;
            x = x - 1;
        }

        return fact;
    }

    /**
     * Test harness only
     * @param args
     */
    public static void main(String[] args) {
        try
        {
            ChiSquaredCalculator calc= new ChiSquaredCalculator();

            //double[] fws = new double[] {0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.805431773d,0.705176409d,0.544620517d,0.502084982d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.5d,0.443618441d,0.437046715d};

            //double[] fws = new double[] {0.999978615d,0.999978615d,0.999978615d,0.999978615d,0.977073559d,0.920371405d,0.791100215d,0.761284306d,0.688686764d,0.665913441d,0.544620517d,0.505132727d,0.500000000d,0.500000000d,0.500000000d,0.485391187d,0.440922722d,0.343761476d,0.309635202d,0.287031402d,0.285771402d,0.239399599d,0.187002702d,0.187002702d,0.098080644d,0.000021385d,0.000021385d,0.000021385d};
           // double[] fws = new double[] {0.485391187d,0.440922722d,0.343761476d,0.309635202d,0.287031402d,0.285771402d,0.239399599d,0.187002702d,0.187002702d,0.098080644d,0.000021385d,0.000021385d,0.000021385d,0.485391187d,0.440922722d,0.343761476d,0.309635202d,0.287031402d,0.285771402d,0.239399599d,0.187002702d,0.187002702d,0.098080644d,0.000021385d,0.000021385d,0.000021385d};

           // double[] fws = new double[] {0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d};
            double[] fws = new double[] {0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d,0.3d};

            //double[] fws = new double[] {0.9999f,0.9999f};

            double chi = calc.calculateChi(fws);

            System.out.println ("CHI: " + chi);

            double inv = calc.calculateInverseChiSquare(chi, fws.length);

            System.out.println ("INV: " + inv);

            System.out.println ("N: " + fws.length);


        }
        catch (Exception e)
        {
            e.printStackTrace ();
        }
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -