⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 icssearchalgorithm.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * ICSSearchAlgorithm.java
 * Copyright (C) 2004 Remco Bouckaert
 * 
 */


package weka.classifiers.bayes.net.search.ci;

import weka.classifiers.bayes.BayesNet;
import weka.classifiers.bayes.net.ParentSet;
import weka.core.Instances;
import java.io.FileReader;

/** ICSSearchAlgorithm implements Conditional Independence based search
 * algorithm for Bayes Network structure learning.
 * 
 * @author Remco Bouckaert
 * @version $Revision$
 */ 
public class ICSSearchAlgorithm extends CISearchAlgorithm {

    String name(int iAttribute) {return m_instances.attribute(iAttribute).name();}
    int maxn() {return m_instances.numAttributes();}
    
    /** maximum size of separating set **/
	private int m_nMaxCardinality = 2; 
	public void setMaxCardinality(int nMaxCardinality) {m_nMaxCardinality = nMaxCardinality;}
	public int getMaxCardinality() {return m_nMaxCardinality;}
	

	
    class SeparationSet {
        public int [] m_set;
        
        public SeparationSet() {
			m_set= new int [getMaxCardinality() + 1];
        } // c'tor

        public boolean contains(int nItem) {
        	for (int iItem = 0; iItem < getMaxCardinality() && m_set[iItem] != -1; iItem++) {
        		if (m_set[iItem] == nItem) {
					return true;
				}
        	}
			return false;
    	} // contains

    } // class sepset

	/**
	 * Search for Bayes network structure using ICS algorithm
	 * @param bayesNet : datastructure to build network structure for
	 * @param instances : data set to learn from
	 * @see weka.classifiers.bayes.SearchAlgorithm#search(BayesNet, Instances)
	 */
	protected void search(BayesNet bayesNet, Instances instances) throws Exception {
        // init
        m_BayesNet = bayesNet;
        m_instances = instances;

        boolean edges[][] = new boolean [maxn() + 1][];
		boolean [] [] arrows = new boolean [maxn() + 1][];
        SeparationSet [] [] sepsets = new SeparationSet [maxn() + 1][];
        for (int iNode = 0 ; iNode < maxn() + 1; iNode++) {
            edges[iNode] = new boolean[maxn()];
			arrows[iNode] = new boolean[maxn()]; 
            sepsets[iNode] = new SeparationSet[maxn()];
        }

        calcDependencyGraph(edges, sepsets);
        calcVeeNodes(edges, arrows, sepsets);
        calcArcDirections(edges, arrows);

		// transfrom into BayesNet datastructure 
		for (int iNode = 0; iNode < maxn(); iNode++) {
			// clear parent set of AttributeX
			ParentSet oParentSet = m_BayesNet.getParentSet(iNode);
			while (oParentSet.getNrOfParents() > 0) {
				oParentSet.deleteLastParent(m_instances);
			}
			for (int iParent = 0; iParent < maxn(); iParent++) {
				if (arrows[iParent][iNode]) {
					oParentSet.addParent(iParent, m_instances);
				}
			}
		}
	} // search
	
	
	/** CalcDependencyGraph determines the skeleton of the BayesNetwork by
	 * starting with a complete graph and removing edges (a--b) if it can
	 * find a set Z such that a and b conditionally independent given Z.
	 * The set Z is found by trying all possible subsets of nodes adjacent 
	 * to a and b, first of size 0, then of size 1, etc. up to size 
	 * m_nMaxCardinality
	 * @param edges : boolean matrix representing the edges
	 * @param sepsets : set of separating sets
	 */
	void calcDependencyGraph(boolean[][] edges, SeparationSet[][] sepsets) {
		/*calc undirected graph a-b iff D(a,S,b) for all S)*/
		SeparationSet oSepSet;

		for (int iNode1 = 0; iNode1 < maxn(); iNode1++) { 
			/*start with complete graph*/
			for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
				edges[iNode1][iNode2] = true;
			}
		}
		for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
			edges[iNode1][iNode1] = false;
		}

		for (int iCardinality = 0; iCardinality <= getMaxCardinality(); iCardinality++) {
			for (int iNode1 = 0; iNode1 <= maxn() - 2; iNode1++) {
				for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
					if (edges[iNode1][iNode2]) {
						oSepSet = existsSepSet(iNode1, iNode2, iCardinality, edges);
						if (oSepSet != null) {
							edges[iNode1][iNode2] = false;
							edges[iNode2][iNode1] = false;
							sepsets[iNode1][iNode2] = oSepSet;
							sepsets[iNode2][iNode1] = oSepSet;
							// report separating set
							System.err.print("I(" + name(iNode1) + ", {");
							for (int iNode3 = 0; iNode3 < iCardinality; iNode3++) {
								System.err.print(name(oSepSet.m_set[iNode3]) + " ");
							}
							System.err.print("} ," + name(iNode2) + ")\n");
						}
					}
				}
			}
			// report current state of dependency graph
			System.err.print(iCardinality + " ");
			for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
				System.err.print(name(iNode1) + " ");
			}
			System.err.print('\n');
			for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
				for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
					if (edges[iNode1][iNode2])
						System.err.print("X ");
					else
						System.err.print(". ");
				}
				System.err.print(name(iNode1) + " ");
				System.err.print('\n');
			}
		}
	} /*CalcDependencyGraph*/

	/** ExistsSepSet tests if a separating set Z of node a and b exists of given 
	 * cardiniality exists. 
	 * The set Z is found by trying all possible subsets of nodes adjacent 
	 * to both a and b of the requested cardinality.
	 * @param iNode1 : index of first node a
	 * @param iNode2 : index of second node b
	 * @param nCardinality : size of the separating set Z
	 * @param deparc : skeleton
	 * @return SeparationSet containing set that separates iNode1 and iNode2 or null if no such set exists
	 */
    SeparationSet existsSepSet(int iNode1, int iNode2, int nCardinality, boolean [] [] edges)
    {
        /*Test if a separating set of node d and e exists of cardiniality k*/
//        int iNode1_, iNode2_;
        int iNode3, iZ;
		SeparationSet Z = new SeparationSet();
		Z.m_set[nCardinality] = -1;

//        iNode1_ = iNode1;
//        iNode2_ = iNode2;

		// find first candidate separating set Z
        if (nCardinality > 0) {
            Z.m_set[0] = next(-1, iNode1, iNode2, edges);
            iNode3 = 1;
            while (iNode3 < nCardinality) {
              Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
              iNode3++;
            }
        }

        if (nCardinality > 0) {
	        iZ = maxn() - Z.m_set[nCardinality - 1] - 1;
        } else {
    	    iZ = 0;
        }
        

        while (iZ >= 0)
        {  
        	//check if candidate separating set makes iNode2_ and iNode1_ independent
            if (isConditionalIndependent(iNode2, iNode1, Z.m_set, nCardinality))	{
                return Z;
            }
			// calc next candidate separating set
            if (nCardinality > 0) {
                Z.m_set[nCardinality - 1] = next(Z.m_set[nCardinality - 1], iNode1, iNode2, edges);
            }
            iZ = nCardinality - 1;   
            while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
                iZ = nCardinality - 1;
                while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
                	iZ--;
                }
                if (iZ < 0) {
                    break;
                }
                Z.m_set[iZ] = next(Z.m_set[iZ], iNode1, iNode2, edges);
                for (iNode3 = iZ + 1; iNode3 < nCardinality; iNode3++) {
                    Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
                }
                iZ = nCardinality - 1;
            }
        }

        return null;
    }  /*ExistsSepSet*/

	/** 
	 * determine index of node that makes next candidate separating set
	 * adjacent to iNode1 and iNode2, but not iNode2 itself
	 * @param x : index of current node
	 * @param iNode1 : first node
	 * @param iNode2 : second node (must be larger than iNode1)
	 * @param edges : skeleton so far
	 * @return int index of next node adjacent to iNode1 after x
	 */
	int next(int x, int iNode1, int iNode2, boolean [] [] edges)
	{
		x++;
		while (x < maxn() && (!edges[iNode1][x] || !edges[iNode2][x] ||x == iNode2)) {
			x++;
		}
		return x;
	}  /*next*/


	/** CalcVeeNodes tries to find V-nodes, i.e. nodes a,b,c such that
	 * a->c<-b and a-/-b. These nodes are identified by finding nodes
	 * a,b,c in the skeleton such that a--c, c--b and a-/-b and furthermore
	 * c is not in the set Z that separates a and b
	 * @param edges : skeleton
	 * @param arrows : resulting partially directed skeleton after all V-nodes 
	 * have been identified
	 * @param sepsets : separating sets
	 */
	void calcVeeNodes(
		boolean[][] edges,
		boolean[][] arrows,
		SeparationSet[][] sepsets) {

		// start with complete empty graph
		for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
			for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
				arrows[iNode1][iNode2] = false;
			}
		}

		for (int iNode1 = 0; iNode1 < maxn() - 1; iNode1++) {
			for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
				if (!edges[iNode1][iNode2]) { /*i nonadj j*/
					for (int iNode3 = 0; iNode3 < maxn(); iNode3++) {
						if ((iNode3 != iNode1
							&& iNode3 != iNode2
							&& edges[iNode1][iNode3]
							&& edges[iNode2][iNode3])
							& (!sepsets[iNode1][iNode2].contains(iNode3))) {
							arrows[iNode1][iNode3] = true; /*add arc i->k*/
							arrows[iNode2][iNode3] = true; /*add arc j->k*/
						}
					}
				}
			}
		}
	} // CalcVeeNodes


	/** CalcArcDirections assigns directions to edges that remain after V-nodes have
	 * been identified. The arcs are directed using the following rules:
	   Rule 1: i->j--k & i-/-k => j->k
	   Rule 2: i->j->k & i--k => i->k
	   Rule 3  m
			 /|\
			i | k  => m->j
	i->j<-k  \|/
			  j
	
	   Rule 4  m
			 / \
			i---k  => i->m & k->m
	  i->j   \ /
			  j
	   Rule 5: if no edges are directed then take a random one (first we can find)
	 * @param edges : skeleton
	 * @param arrows : resulting fully directed DAG
	 */
	void calcArcDirections(boolean[][] edges, boolean[][] arrows) {
		/*give direction to remaining arcs*/
		int i, j, k, m;
		boolean bFound;

		do {
			bFound = false;

			/*Rule 1: i->j--k & i-/-k => j->k*/

			for (i = 0; i < maxn(); i++) {
				for (j = 0; j < maxn(); j++) {
					if (i != j && arrows[i][j]) {
						for (k = 0; k < maxn(); k++) {
							if (i != k
								&& j != k
								&& edges[j][k]
								&& !edges[i][k]
								&& !arrows[j][k]
								&& !arrows[k][j]) {
								arrows[j][k] = true;
								bFound = true;
							}
						}
					}
				}
			}

			/*Rule 2: i->j->k & i--k => i->k*/

			for (i = 0; i < maxn(); i++) {
				for (j = 0; j < maxn(); j++) {
					if (i != j && arrows[i][j]) {
						for (k = 0; k < maxn(); k++) {
							if (i != k
								&& j != k
								&& edges[i][k]
								&& arrows[j][k]
								&& !arrows[i][k]
								&& !arrows[k][i]) {
								arrows[i][k] = true;
								bFound = true;
							}
						}
					}
				}
			}

			/* Rule 3  m
			         /|\
			        i | k  => m->j
			i->j<-k  \|/
			          j
			*/
			for (i = 0; i < maxn(); i++) {
				for (j = 0; j < maxn(); j++) {
					if (i != j && arrows[i][j]) {
						for (k = 0; k < maxn(); k++) {
							if (k != i
								&& k != j
								&& arrows[k][j]
								&& !edges[k][i]) {
								for (m = 0; m < maxn(); m++) {
									if (m != i
										&& m != j
										&& m != k
										&& edges[m][i]
										&& !arrows[m][i]
										&& !arrows[i][m]
										&& edges[m][j]
										&& !arrows[m][j]
										&& !arrows[j][m]
										&& edges[m][k]
										&& !arrows[m][k]
										&& !arrows[k][m]) {
										arrows[m][j] = true;
										bFound = true;
									}
								}
							}
						}
					}
				}
			}

			/* Rule 4  m
			         / \
			        i---k  => i->m & k->m
			  i->j   \ /
			          j
			*/
			for (i = 0; i < maxn(); i++) {
				for (j = 0; j < maxn(); j++) {
					if (i != j && arrows[j][i]) {
						for (k = 0; k < maxn(); k++) {
							if (k != i
								&& k != j
								&& edges[k][j]
								&& !arrows[k][j]
								&& !arrows[j][k]
								&& edges[k][i]
								&& !arrows[k][i]
								&& !arrows[i][k]) {
								for (m = 0; m < maxn(); m++) {
									if (m != i
										&& m != j
										&& m != k
										&& edges[m][i]
										&& !arrows[m][i]
										&& !arrows[i][m]
										&& edges[m][k]
										&& !arrows[m][k]
										&& !arrows[k][m]) {
										arrows[i][m] = true;
										arrows[k][m] = true;
										bFound = true;
									}
								}
							}
						}
					}
				}
			}

			/*Rule 5: if no edges are directed then take a random one (first we can find)*/

			if (!bFound) {
				i = 0;
				while (!bFound && i < maxn()) {
					j = 0;
					while (!bFound && j < maxn()) {
						if (edges[i][j]
							&& !arrows[i][j]
							&& !arrows[j][i]) {
							arrows[i][j] = true;
							bFound = true;
						}
						j++;
					}
					i++;
				}
			}

		}
		while (bFound);

	} // CalcArcDirections


	/**
	 * @return a string to describe the MaxCardinality option.
	 */
	public String maxCardinalityTipText() {
	  return "When determining whether an edge exists a search is performed for a set Z "+
	  "that separates the nodes. MaxCardinality determines the maximum size of the set Z. " +
	  "This greatly influences the length of the search. Default value is 2.";
	} // maxCardinalityTipText

	/**
	 * This will return a string describing the search algorithm.
	 * @return The string.
	 */
	public String globalInfo() {
	  return "This Bayes Network learning algorithm uses conditional independence tests " +
	  "to find a skeleton, finds V-nodes and applies a set of rules to find the directions " +
	  "of the remaining arrows.";
	}

	static public void main(String [] argv) {
		try {
			BayesNet b = new BayesNet();
			b.setSearchAlgorithm( new ICSSearchAlgorithm());
			Instances instances = new Instances(new FileReader("C:\\eclipse\\workspace\\weka\\data\\contact-lenses.arff"));
			instances.setClassIndex(instances.numAttributes() - 1);
			b.buildClassifier(instances);
			System.out.println(b.toString());
		} catch (Exception e) {
			e.printStackTrace();
		}
	} // main

} // class ICSSearchAlgorithm

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -