📄 xmeans.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    int [][] w1 = new int[1][instList.length];    for (int i = 0; i < instList.length; i++) {      w1[0][i] = instList[i];    }    double [] m = {mle};    Instances w2 = new Instances(model, 1);    w2.add(center);    return calculateBIC(w1, w2, m);    }    /**   * Calculates the BIC for the given set of centers and instances.   * @param instOfCent The instances that belong to their respective centers   * @param centers the centers   * @param mle maximum likelihood   * @return The BIC for the input.   */  private double calculateBIC(int [][] instOfCent, Instances centers,			      double [] mle) {    double loglike = 0.0;    int numInstTotal = 0;    int numCenters = centers.numInstances();    int numDimensions = centers.numAttributes();    int numParameters = (numCenters - 1) + //probabilities      numCenters * numDimensions + //means      numCenters; // variance params    for (int i = 0; i < centers.numInstances(); i++) {      loglike += logLikelihoodEstimate(instOfCent[i].length, centers.instance(i),				       mle[i], centers.numInstances() * 2);      numInstTotal += instOfCent[i].length;    }    /* diff       thats how we did it    loglike -= ((centers.numAttributes() + 1.0) * centers.numInstances() * 1)      * Math.log(count);      */    loglike -= numInstTotal * Math.log(numInstTotal);    //System.out.println ("numInstTotal " + numInstTotal +    //                    "calculateBIC res " + loglike);    loglike -= (numParameters / 2.0) * Math.log(numInstTotal);    //System.out.println ("numParam " +    //                     + numParameters +    //			" calculateBIC res " + loglike);    return loglike;  }    /**   * Calculates the log-likelihood of the data for the given model, taken   * at the maximum likelihood point.   *   * @param numInst number of instances that belong to the center   * @param center the center   * @param distortion distortion    * @param numCent number of centers    * @return the likelihood estimate   */  private double logLikelihoodEstimate(int numInst, 				       Instance center, 				       double distortion, 				       int numCent) {    // R(n) num of instances of the center -> numInst    // K num of centers -> not used    //    //todo take the diff comments away    double loglike = 0;    /* if is new */    if (numInst > 1) {      /* diff variance is new */      //      // distortion = Sum over instances x of the center(x-center)      // different to paper; sum should be squared      //      // (Sum of distances to center) / R(n) - 1.0      // different to paper; should be R(n)-K      double variance =  distortion / (numInst - 1.0);         //      //  -R(n)/2 * log(pi*2)      //      double p1 = - (numInst / 2.0) * Math.log(Math.PI * 2.0);      /* diff	 thats how we had it	 double p2 = -((ni * center.numAttributes()) / 2) * distortion;      */      //      // -(R(n)*M)/2 * log(variance)       //      double p2 = - (numInst * center.numAttributes()) / 2 * Math.log(variance);            /* diff	 thats how we had it, the difference is a bug in x-means	 double p3 = - (numInst - numCent) / 2;      */      //      // -(R(n)-1)/2      //      double p3 = - (numInst - 1.0) / 2.0;            //      // R(n)*log(R(n))      //      double p4 = numInst * Math.log(numInst);            /* diff x-means doesn't have this part 	 double p5 = - numInst * Math.log(numInstTotal);      */            /*	loglike = -(ni / 2) * Math.log(Math.PI * 2) 	- (ni * center.numAttributes()) / 2.0) * logdistortion	- (ni - k) / 2.0 	+ ni * Math.log(ni) 	- ni * Math.log(r);      */      //OOPS("distortion " + distortion);      //OOPS("variance " + variance);      //OOPS("p1 " + p1);      //OOPS("p2 " + p2);      //OOPS("p3 " + p3);      //OOPS("p4 " + p4);      //OOPS(p1 + " " + p2 + " " + p3 + " " + p4 + " " + p5 + " " +      //	       distortion);      loglike = p1 + p2 + p3 + p4; // diff + p5;      //OOPS("loglike " + loglike);      //the log(r) is something that can be reused.      //as is the log(2 PI), these could provide extra speed up later on.      //since distortion is so expensive to compute, I only do that once.    }    return loglike;  }    /**   * Calculates the maximum likelihood estimate for the variance.   * @param instOfCent indices of instances to each center   * @param centers the centers   * @return the list of distortions distortion.   */  private double [] distortion(int[][] instOfCent, Instances centers)     throws Exception {    double [] distortion = new double [centers.numInstances()];    for (int i = 0; i < centers.numInstances(); i++) {      distortion[i] = 0.0;      for (int j = 0; j < instOfCent[i].length; j++) {	distortion[i] += m_DistanceF.distance(                                 m_Instances.instance(instOfCent[i][j]), 				 centers.instance(i));      }    }    /* diff not done in x-means    res *= 1.0 / (count - centers.numInstances());    */    return distortion;  }    /**   * Clusters an instance.   * @param instance the instance to assign a cluster to.   * @param centers the centers to cluster the instance to.   * @return a cluster index.   */  private int clusterProcessedInstance(Instance instance, Instances centers)throws Exception{        double minDist = Integer.MAX_VALUE;    int bestCluster = 0;    for (int i = 0; i < centers.numInstances(); i++) {      double dist = m_DistanceF.distance(instance, centers.instance(i));      if (dist < minDist) {	minDist = dist;     	bestCluster = i;          }                         };                             return bestCluster;  }    /**   * Clusters an instance that has been through the filters.   *   * @param instance the instance to assign a cluster to   * @return a cluster number   */  private int clusterProcessedInstance(Instance instance) throws Exception {    double minDist = Integer.MAX_VALUE;    int bestCluster = 0;    for (int i = 0; i < m_NumClusters; i++) {      double dist = m_DistanceF.distance(instance, m_ClusterCenters.instance(i));      if (dist < minDist) {	minDist = dist;	bestCluster = i;      }    }    return bestCluster;  }  /**   * Classifies a given instance.   *   * @param instance the instance to be assigned to a cluster   * @return the number of the assigned cluster as an integer   * if the class is enumerated, otherwise the predicted value   * @exception if instance could not be classified   * successfully   */  public int clusterInstance(Instance instance) throws Exception {    m_ReplaceMissingFilter.input(instance);    Instance inst = m_ReplaceMissingFilter.output();    return clusterProcessedInstance(inst);  }  /**   * Returns the number of clusters.   *   * @return the number of clusters generated for a training dataset.   */  public int numberOfClusters() {    return m_NumClusters;  }  /**   * Returns an enumeration describing the available options.    * @return an enumeration of all the available options   **/  public Enumeration listOptions() {    Vector newVector = new Vector(4);     newVector.addElement(new Option(       "\tmaximum number of overall iterations" +       " (default = 1).",        "I", 1, "-I <num>"));     newVector.addElement(new Option(       "\tmaximum number of iterations in the kMeans loop in" +       " the Improve-Parameter part "+       " (default = 1000).",        "M", 1, "-M <num>"));     newVector.addElement(new Option(       "\tmaximum number of iterations in the kMeans loop" +       " for the splitted centroids in the Improve-Structure part "+       " (default = 1000).",       "J", 1, "-J <num>"));     newVector.addElement(new Option(       "\tminimum number of clusters" +       " (default = 2).",        "L", 1, "-L <num>"));     newVector.addElement(new Option(       "\tmaximum number of clusters" +       " (default = 4).",       "H", 1, "-H <num>"));     newVector.addElement(new Option(       "\tdistance value for binary attributes" +       " (default = 1.0).",       "V", 1, "-V <value>"));     newVector.addElement(new Option(       "\tFull class name of KDTree class to use, followed\n" +       "\tby scheme options.\n" +       "\teg: \"weka.core.KDTree -P\"\n" +       "(default = no KDTree class used).",       "K", 1, "-K <KDTree class specification>"));     newVector.addElement(new Option(       "\tcutoff factor, takes the given percentage of the splitted \n" +       "\tcentroids if none of the children win\n" +       "\t(default = 0.0).",       "C", 1, "-C <value>"));     newVector.addElement(new Option(       "\tFull class name of Distance function class to use, followed\n" +       "\tby scheme options.\n" +       "\teg: \"weka.core.MahalanobisDistance\"\n" +       "\t(default = weka.core.EuclideanDistance).",       "K", 1, "-K <distance function class specification>"));     newVector.addElement(new Option(       "\tfile to read starting centers from (ARFF format).",       "N", 1, "-N <file name>"));     newVector.addElement(new Option(       "\tfile to write centers to (ARFF format).",       "O", 1, "-O <file name>"));     newVector.addElement(new Option(       "\trandom number seed (default 10).",       "S", 1, "-S <num>"));     return  newVector.elements();  }  /**   * Returns the tip text for this property   * @return tip text for this property    */  public String minNumClustersTipText() {    return "set minimum number of clusters";  }  /**   * Returns the tip text for this property   * @return tip text for this property    */  public String maxNumClustersTipText() {    return "set maximum number of clusters";  }  /**   * Sets the maximum number of iterations to perform.   * @param i the number of iterations   * @exception Exception if i is less than 1   */  public void setMaxIterations(int i) throws Exception {    if (i < 0)       throw new Exception("Only positive values for iteration number" +                           " allowed (Option I).");     m_MaxIterations = i;  }  /**   * Gets the maximum number of iterations.   * @return the number of iterations   */  public int getMaxIterations() {    return  m_MaxIterations;  }  /**   * Set the maximum number of iterations to perform in KMeans   * @param i the number of iterations   */  public void setMaxKMeans(int i) {    m_MaxKMeans = i;    m_MaxKMeansForChildren = i;  }  /**   * Gets the maximum number of iterations in KMeans.   * @return the number of iterations   */  public int getMaxKMeans() {    return  m_MaxKMeans;  }  /**   * Sets the maximum number of iterations KMeans that is performed    * on the child centers.   * @param i the number of iterations   */  public void setMaxKMeansForChildren(int i) throws Exception {    m_MaxKMeansForChildren = i;  }  /**   * Gets the maximum number of iterations in KMeans.   * @return the number of iterations   */  public int getMaxKMeansForChildren() {    return  m_MaxKMeansForChildren;  }  /**   * Sets a new cutoff factor.   * @param i the new cutoff factor   */  public void setCutOffFactor(double i) throws Exception {    m_CutOffFactor = i;  }  /**   * Gets the cutoff factor.   * @return the cutoff factor   */  public double getCutOffFactor() {    return  m_CutOffFactor;  }  /**   * Sets the minimum number of clusters to generate.   *   * @param n the minimum number of clusters to generate   */  public void setMinNumClusters(int n) {    if (n <= m_MaxNumClusters) {      m_MinNumClusters = n;    }  }  /**   * Sets the maximum number of clusters to generate.   * @param n the maximum number of clusters to generate   */  public void setMaxNumClusters(int n) {    if (n >= m_MinNumClusters) {      m_MaxNumClusters = n;    }  }  /**   * Returns the tip text for this property   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String binValueTipText() {    return "Set the value that represents true in the new attributes.";  }  /**   * Gets value that represents true in a new numeric attribute.   * (False is always represented by 0.0.)   * @return the value that represents true in a new numeric attribute   */  public double getBinValue() {    return m_BinValue;  }  /**   * Sets the distance e value between true and false of binary attributes    * and  "same" and "different" of nominal attributes       * @param double value   */  public void setBinValue(double value) {    m_BinValue = value;  }  /**   * gets the "binary" distance value    * @param distanceF the distance function with all options set   */  public void setDistanceF(DistanceFunction distanceF) {    m_DistanceF = distanceF;  }  /**   * Gets the distance function.   * @return the distance function   */  public DistanceFunction getDistanceF() {    return m_DistanceF;  }
💿 文件大小 12323 K
👤 上传用户 ilovexzhu
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#university #supervised #learning #wekaUT
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -