📄 bntools.java
字号:
/** * JBNC - Bayesian Network Classifiers Toolbox <p> * * Latest release available at http://sourceforge.net/projects/jbnc/ <p> * * Copyright (C) 1999-2003 Jarek Sacha <p> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. <p> * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. <p> * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307, USA. <br> * http://www.fsf.org/licenses/gpl.txt */package jbnc.util;import BayesianNetworks.BayesNet;import BayesianNetworks.ProbabilityFunction;import BayesianNetworks.ProbabilityVariable;import InferenceGraphs.InferenceGraph;import InferenceGraphs.InferenceGraphNode;import jbnc.dataset.AttributeSpecs;import jbnc.dataset.AttributeType;import jbnc.dataset.Dataset;import jbnc.dataset.DatasetInt;import java.util.HashMap;import java.util.Iterator;import java.util.Vector;/** * Utilities for Bayesian networks. * * @author Jarek Sacha * @since June 1, 1999 */public final class BNTools { /** * A some small value larger than zero. */// final public static double beta_ijk = 1e-30; // public final static double beta_ijk = 1e-3; /** * Returns dimension of a Bayesian network. <p> * <p/> * <b>Dimension of a Bayesian network:</b> <i> Let X be a set of random * variables and B be a Bayesian network defined over X. The dimension of * this network, Dim(B), is the number of free parameters required to * completely specify the joint probability distribution of X.</i> <br> * <p/> * <p/> * E. Castillo, J. M. Gutierrez and A. S. Hadi, <i>Expert Systems and * Probabilistic Network Models</i> , Springer, 1997. p.486. * * @param net Description of Parameter * @return The NetworkDimension value * @throws Exception . */ public static int getNetworkDimension(BayesNet net) throws Exception { int dim = 0; ProbabilityVariable[] vars = net.get_probability_variables(); for (int i = 0; i < vars.length; ++i) { int r_i = vars[i].number_values(); ProbabilityFunction func = net.get_function(vars[i]); int nbValues = func.number_values(); int nbVars = func.number_variables(); int q_i = nbValues / r_i; if (nbVars < 1) { throw new Exception("A function has no variables."); } if ((nbValues % r_i) != 0) { throw new Exception("Incorrect number of values of function #" + i); } if (nbVars == 1) { dim += r_i - 1; } else { dim += (r_i - 1) * q_i; } } return dim; } /** * Return network parameters component of the asymptotic standard Bayesian * measure (ASBM). <p> * <p/> * <i>q</i> = sum<sub><i>i</i> =1...<i>n</i> </sub> sum<sub><i>j</i> * =1...<i> q<sub>i</sub> </i> </sub> sum<sub><i>k</i> =1...<i>r<sub>i</sub> * </i> </sub> <i>N<sub>ijk</sub> </i> log <i>N<sub>ijk</sub> </i> / * <i>N<sub>ij </sub></i> <p> * <p/> * where <i>N<sub>ijk</sub> </i> means that variable <i>X<sub>i</sub> </i> * is in configuration <i>k</i> and parents of variable <i>X<sub>i</sub> * </i> are in configuration <i>j</i> . <p> * <p/> * E. Castillo, J. M. Gutierrez and A. S. Hadi, <i>Expert Systems and * Probabilistic Network Models</i> , Springer, 1997. p.494, eq.(11.28). * * @param net Description of Parameter * @param dataset Description of Parameter * @param usePriors Description of Parameter * @param alphaK Description of Parameter * @return The ASBMParamComponent value * @throws Exception . */ public final static double getASBMParamComponent(BayesNet net, DatasetInt dataset, boolean usePriors, double alphaK) throws Exception { double q = 0; // Verify that all attributes are discrete AttributeSpecs[] names = dataset.names; for (int n = 0; n < names.length; ++n) { if (names[n].getType() != AttributeType.DISCRETE) { throw new Exception("All attributes in the data set have to be discrete."); } } int nbVars = names.length; int nbCases = dataset.cases.size(); int[] varSize = new int[nbVars]; for (int i = 0; i < nbVars; ++i) { varSize[i] = names[i].getStates().length; } // Sanity check if (nbVars != net.number_variables()) { throw new Exception("Number of variables in the data set and in the network do no agree (" + nbVars + "!=" + net.number_variables() + ")."); } // Iterate through the list of probability functions/variables // and calculate new values using frequencies in the training dataset.// ProbabilityVariable[] vars = net.get_probability_variables(); ProbabilityFunction[] funcs = net.get_probability_functions(); for (int funcNb = 0; funcNb < funcs.length; ++funcNb) { if (funcs[funcNb] == null) { continue; } int[] varIndx = funcs[funcNb].get_indexes(); double[] vals = funcs[funcNb].get_values(); int[] vCount = new int[vals.length]; int[] varCycle = new int[varIndx.length]; varCycle[varCycle.length - 1] = 1; for (int i = varCycle.length - 2; i >= 0; --i) { varCycle[i] = varCycle[i + 1] * varSize[varIndx[i + 1]]; } // Calculate frequencies int varCycle_0 = varCycle[0]; int[] count = new int[varCycle_0]; for (int caseNb = 0; caseNb < nbCases; ++caseNb) { int[] thisCase = (int[]) dataset.cases.get(caseNb); int index = 0; for (int varNb = 0; varNb < varIndx.length; ++varNb) { index += varCycle[varNb] * thisCase[varIndx[varNb]]; } ++vCount[index]; ++count[index % varCycle_0]; } // Calculate contribution from this variable int r_i = vals.length / varCycle_0; double alpha_ij = alphaK * r_i; for (int i = 0; i < vals.length; ++i) { int n_ij = count[i % varCycle_0]; int n_ijk = vCount[i]; if (usePriors) { double num = n_ijk + alphaK - 1; double denum = n_ij + alpha_ij - r_i;// double denum = n_ij + alpha_ij; if (num != 0 && denum != 0) {// double num_1 = n_ijk + alphaK;// double frac = num_1/denum; double frac = num / denum; if (frac > 0) { q += num * Math.log(frac); } } } else { if (n_ij > 0 && n_ijk > 0) { q += n_ijk * Math.log(n_ijk / (double) n_ij); } } } } return q; } /** * Returns the value ln[ gamma(xx)] for xx > 0 Implementation based on W.H. * Press et al. <i>Numerical Recipes in C</i> , 2nd Ed., Cambridge * University Press, 1992. * * @param xx * @return the value of ln[ gamma(xx)] for xx > 0. * @throws Exception When xx <= 0. */ public static double gammaLn(double xx) throws Exception { if (xx <= 0) { throw new Exception("Argument has to be greater then zero."); } final double[] cof = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5}; double x = xx; double y = xx; double tmp = x + 5.5; tmp -= (x + 0.5) * Math.log(tmp); double ser = 1.000000000190015; for (int j = 0; j < 6; ++j) { ser += cof[j] / ++y; } return -tmp + Math.log(2.5066282746310005 * ser / x); } /** * Learns parameters for the current network structure. Existing network * parameters are replaced with the new ones. This method can use "uniform" * Dirihlet priors. * * @param net Bayesian network. * @param useDirihlet Indicates whether Dirihlet priors should be used for * network parameters. * @param alphaK alpha<sub>k</sub> parameter for Dirihlet priors. All * alpha<sub>k</sub> are assumed to be the same and * greater than zero. * @param fc Description of Parameter * @throws Exception */ public static void learnParameters(BayesNet net, FrequencyCalc fc, boolean useDirihlet, double alphaK) throws Exception { if (useDirihlet && (alphaK <= 0)) { throw new Exception("When using Dirihlet priors alphaK must be greater than zero."); } int nbAttrib = fc.names.length - 1; int nbVars = nbAttrib + 1; int[] varSize = new int[nbVars]; for (int i = 0; i < nbVars; ++i) { varSize[i] = fc.names[i].getStates().length; } // Sanity check if (nbVars != net.number_variables()) { throw new Exception("Number of variables in the data set and in the network do no agree (" + nbVars + "!=" + net.number_variables() + ")."); } // Set priors double alpha_ijk = useDirihlet ? alphaK : beta_ijk; // Iterate through the list of probability functions // and calculate new values using frequencies in the training dataset. ProbabilityVariable[] vars = net.get_probability_variables(); ProbabilityFunction[] funcs = net.get_probability_functions(); for (int funcNb = 0; funcNb < funcs.length; ++funcNb) { if (funcs[funcNb] == null) { continue; } int[] varIndx = funcs[funcNb].get_indexes(); double[] vals = funcs[funcNb].get_values(); if (varIndx.length == 1) { int thisVarIndex = varIndx[0]; int thisVarSize = varSize[thisVarIndex]; double denom = fc.nbCases + alpha_ijk * thisVarSize; for (int k = 0; k < thisVarSize; ++k) { vals[k] = (fc.freqX[thisVarIndex][k] + alpha_ijk) / denom; } } else if (varIndx.length == 2) { int i_x = varIndx[0]; int i_y = varIndx[1]; int xSize = varSize[i_x]; int ySize = varSize[i_y]; int[][] freqXY = fc.freqXY[i_x][i_y]; int[] Nij = new int[ySize]; for (int k_x = 0; k_x < xSize; ++k_x) { for (int k_y = 0; k_y < ySize; ++k_y) { Nij[k_y] += freqXY[k_x][k_y]; } } int index = 0; double alpha = alpha_ijk * xSize; for (int k_x = 0; k_x < xSize; ++k_x) { for (int k_y = 0; k_y < ySize; ++k_y) { vals[index] = (freqXY[k_x][k_y] + alpha_ijk) / (Nij[k_y] + alpha); ++index; } } } else if (varIndx.length == 3) { int i_x = varIndx[0];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -