⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hac.java

📁 wekaUT是 university texas austin 开发的基于weka的半指导学习(semi supervised learning)的分类器
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
    default:      throw new Exception("Unknown linkage type!");    }    return distance;  }  /**   * set the verbosity level of the clusterer   * @param verbose messages on(true) or off (false)   */  public void setVerbose (boolean verbose) {    m_verbose = verbose;  }  /**   * get the verbosity level of the clusterer   * @return verbose messages on(true) or off (false)   */  public boolean getVerbose () {    return m_verbose;  }  /**   * Returns an enumeration describing the available options   *   * @return an enumeration of all the available options   **/  public Enumeration listOptions() {        Vector newVector = new Vector(2);        newVector.addElement(new Option("\tThreshold.\n"				    +"\t(default=MAX_DOUBLE)", "T", 1,"-T <0-MAX_DOUBLE>"));    newVector.addElement(new Option("\tNumber of clusters.\n"				    +"a\t(default=-1)", "N", 1,"-N <-1-MAX_INT100%>"));    return newVector.elements();  }  /**   * Parses a given list of options.   *   * Valid options are:<p>   *   * -A <0-100> <br>   * Acuity. <p>   *   * -C <0-100> <br>   * Cutoff. <p>   *   * @param options the list of options as an array of strings   * @exception Exception if an option is not supported   *   **/  public void setOptions(String[] options) throws Exception {    String optionString;    optionString = Utils.getOption('N', options);     if (optionString.length() != 0) {      setNumClusters(Integer.parseInt(optionString));    }      }  /**   * Gets the current settings of Greedy Agglomerative Clustering   *   * @return an array of strings suitable for passing to setOptions()   */  public String [] getOptions() {        String [] options = new String [70];    int current = 0;    options[current++] = "-N";     options[current++] = "" + m_numClusters;    if (m_linkingType == SINGLE_LINK) {      options[current++] = "-I";    } else if (m_linkingType == COMPLETE_LINK) {      options[current++] = "-C";    } else if (m_linkingType == GROUP_AVERAGE) {      options[current++] = "-G";    }    if (m_seedable) {      options[current++] = "-S";    }    options[current++] = "-M";    options[current++] = m_metric.getClass().getName();    if (m_metric instanceof OptionHandler) {      String[] metricOptions = ((OptionHandler)m_metric).getOptions();      for (int i = 0; i < metricOptions.length; i++) {	options[current++] = metricOptions[i];      }    }     while (current < options.length) {      options[current++] = "";    }    return options;  }   /**   * Train the clusterer using specified parameters   *   * @param instances Instances to be used for training   */  public void trainClusterer (Instances instances) throws Exception {    if (m_metric instanceof LearnableMetric) {      if (((LearnableMetric)m_metric).getTrainable()) {	((LearnableMetric)m_metric).learnMetric(instances);      }      else {	throw new Exception ("Metric is not trainable");      }    }    else {      throw new Exception ("Metric is not trainable");    }  }  /** returns objective function, needed for compatibility with SemiSupClusterer */  public double objectiveFunction() {    return Double.NaN;  }    /** Return the number of clusters */  public int getNumClusters() {    return m_numClusters;  }  /** A duplicate function to conform to Clusterer abstract class.   * @returns the number of clusters   */  public int numberOfClusters() {    return getNumClusters();  }  /**   * get an array of random indeces out of n possible values.   * if the number of requested indeces is larger then maxIdx, returns   * maxIdx permuted values   * @param maxIdx - the maximum index of the set   * @param numIdxs number of indexes to return   * @return an array of indexes   */  public static int[] randomSubset(int numIdxs, int maxIdx) {    Random r = new Random(maxIdx + numIdxs);    int[] indeces = new int[maxIdx];    for (int i = 0; i < maxIdx; i++) {      indeces[i] = i;    }    // permute the indeces randomly    for (int i = 0; i < indeces.length; i++) {      int idx = r.nextInt (maxIdx);      int temp = indeces[idx];      indeces[idx] = indeces[i];      indeces[i] = temp;    }    int []returnIdxs = new int[Math.min(numIdxs,maxIdx)];    for (int i = 0; i < returnIdxs.length; i++) {      returnIdxs[i] = indeces[i];    }    return returnIdxs;  }    // Main method for testing this class  public static void main(String [] argv)  {    try {      //////// Iris data      //String datafile = "/u/ml/software/weka-latest/data/iris.arff";      //      String datafile = "/u/mbilenko/ml/tivoli/user-features-GroupClassGrad.arff";      String datafile = "/u/mbilenko/ml/tivoli/data/user-features-processClass.arff";      //      String datafile = "/u/mbilenko/weka/data/glass.arff";          // set up the data      FileReader reader = new FileReader (datafile);      Instances data = new Instances (reader);      // filter out bad attributes for tivoli clustering      String [] filteredProcesses = {"pico", "twm", "Xvnc", "lpr", "fvwm2", "xclock", "FvwmButtons", "FvwmPager", "ymessenger.bin",      "vim", "vi", "xemacs", "xscreensaver", "gnome-panel", "gnome-settings-daemon", "gconfd-2", "xlock", "kdesud", "ssh",      "tasklist_applet", "panel", "gnome-session", "gnome-smproxy", "MozillaFirebird-bin", "nautilus", "mutt",      "mixer_applet2", "metacity", "bonobo-activation-server", "csh", "nautilus-throbber", "xmms", "realplay", "konqueror", "knode", "kdesktop_lock", "kwrapper", "artsd", "esd", "gnome-panel", "gnome-terminal", "mail", "gnome-name-service", "deskguide_applet", "sawfish",      "gaim", "konsole", "opera", "enlightenment", "6", "wmaker"};      System.out.println("filtered=" + filteredProcesses.length);       int[] descrIndeces = new int[filteredProcesses.length];      for (int i = 0; i < descrIndeces.length; i++) {	Attribute attr = data.attribute(filteredProcesses[i]);	System.out.println(i + ": " + attr);	descrIndeces[i] = attr.index();      }      Remove attributeFilter = new Remove();      attributeFilter.setAttributeIndicesArray(descrIndeces);      attributeFilter.setInvertSelection(false);      attributeFilter.setInputFormat(data);      data = Filter.useFilter(data, attributeFilter);            // Make the last attribute be the class       int theClass = data.numAttributes();      data.setClassIndex(theClass-1); // starts with 0      //        int numClusters = data.numClasses();            Instances clusterData = new Instances(data);      clusterData.deleteClassAttribute();            WeightedEuclidean euclidean = new WeightedEuclidean(clusterData.numAttributes());      WeightedDotP dotp = new WeightedDotP(clusterData.numAttributes());      //      HAC hac = new HAC(euclidean);      HAC hac = new HAC(dotp);      hac.setVerbose(false);      clusterData = hac.filterInstanceDescriptions(clusterData);      // cluster without seeding      System.out.println("\nClustering the user data ...\n");            hac.setLinkingType(new SelectedTag(COMPLETE_LINK, TAGS_LINKING));      // trim the instances      //      int i = 6;      // while  (i < clusterData.numInstances()) {      //	clusterData.delete(i);      //}      // cluster with seeding            //      ArrayList seedArray = new ArrayList();      //      for (int i = 0; i < 19; i++) {      //	seedArray.add(clusterData.instance(i));      //      }//        seedArray.add(clusterData.instance(0));//        seedArray.add(clusterData.instance(1));//        seedArray.add(clusterData.instance(2));//        seedArray.add(clusterData.instance(3));//        seedArray.add(clusterData.instance(4));      //        seedArray.add(clusterData.instance(50));//        seedArray.add(clusterData.instance(51));//        seedArray.add(clusterData.instance(52));//        seedArray.add(clusterData.instance(53));//        seedArray.add(clusterData.instance(54));//        seedArray.add(clusterData.instance(100));//        seedArray.add(clusterData.instance(101));//        seedArray.add(clusterData.instance(102));//        seedArray.add(clusterData.instance(103));//        seedArray.add(clusterData.instance(104));//        Seeder seeder = new Seeder(clusterData, data);//        seeder.setVerbose(false);//        seeder.createSeeds(seedArray);//        HashMap seedHash = seeder.getSeeds();//        hac.setSeedHash(seedHash);      HashMap classInstanceHash = new HashMap();      // get the data for each class      for (int i = 0; i < data.numInstances(); i++) {	Instance instance = data.instance(i);	Integer classValue = new Integer((int) instance.classValue());	if (classInstanceHash.containsKey(classValue)) {	  ArrayList classList = (ArrayList) classInstanceHash.get(classValue);	  classList.add(new Integer(i));	  System.out.println("Seen class; now has " + classList.size() + " elements");	} else { // unseen class	  System.out.println("Unseen class " + classValue);	  ArrayList classList = new ArrayList();	  classList.add(new Integer(i));	  classInstanceHash.put(classValue, classList);	}       }      // sample from the classes that have more than 1 instance      double seedProportion = 0.7;      ArrayList seedArray = new ArrayList();      Iterator iterator = classInstanceHash.entrySet().iterator();      while (iterator.hasNext()) {	Map.Entry entry = (Map.Entry) iterator.next();	ArrayList classList = (ArrayList) entry.getValue();		System.out.println("Classlist for " + entry.getKey() + " has " + classList.size() + " elements\n");			if (classList.size() > 1) {	  int [] seedIndeces = randomSubset((int) ((classList.size() + 0.0) * seedProportion), classList.size());	  System.out.println("Seeding for class " + entry.getKey() + " using " + seedIndeces.length);	  for (int i = 0; i < seedIndeces.length; i++) {	    seedArray.add(clusterData.instance(((Integer)(classList.get(seedIndeces[i]))).intValue()));	    System.out.println("Adding seed " + classList.get(seedIndeces[i]));	  } 	}      }      Seeder seeder = new Seeder(clusterData, data);      seeder.setVerbose(false);      seeder.createSeeds(seedArray);      HashMap seedHash = seeder.getSeeds();      hac.setSeedHash(seedHash);            hac.buildClusterer(clusterData, 1);      hac.printClusters();//        System.out.println("Cluster assignments: ");//        for (int i=0; i < hac.m_clusterAssignments.length; i++) {//  	System.out.print(i + ":" + hac.m_clusterAssignments[i] + "  ");//        }//        System.out.println("\n\n");//        for (int j = 0; j < clusterData.numInstances(); j++) {//  	System.out.println(j + ":" + hac.clusterInstance(clusterData.instance(j)));//        }      ////////////////////////////////////////////////////      //  HI-DIM TESTING      ////////////////////////////////////////////////////      //////// Text data - 300 documents//        datafile = "/u/ml/software/weka-latest/data/20newsgroups/different-100_fromCCS.arff";//        System.out.println("\nClustering diff-100 newsgroup data with seeding, using constrained HAC...\n");      //        // set up the data//        reader = new FileReader (datafile);//        data = new Instances (reader);//        System.out.println("Initial data has size: " + data.numInstances());//        // Make the last attribute be the class //        theClass = data.numAttributes();//        data.setClassIndex(theClass-1); // starts with 0//        numClusters = data.numClasses();      //        WeightedDotP dotp = new WeightedDotP(data.numAttributes());//        hac = new HAC (dotp);//        // cluster with seeding      //        Instances seeds = new Instances(data, 0, 5);//        seeds.add(data.instance(100));//        seeds.add(data.instance(101));//        seeds.add(data.instance(102));//        seeds.add(data.instance(103));//        seeds.add(data.instance(104));//        seeds.add(data.instance(200));//        seeds.add(data.instance(201));//        seeds.add(data.instance(202));//        seeds.add(data.instance(203));//        seeds.add(data.instance(204));//        System.out.println("Labeled data has size: " + seeds.numInstances() + ", number of attributes: " + data.numAttributes());//        data.delete(204);//        data.delete(203);//        data.delete(202);//        data.delete(201);//        data.delete(200);//        data.delete(104);//        data.delete(103);//        data.delete(102);//        data.delete(101);//        data.delete(100);//        data.delete(4);//        data.delete(3);//        data.delete(2);//        data.delete(1);//        data.delete(0);//        System.out.println("Unlabeled data has size: " + data.numInstances());//        // Remove the class labels before clustering//        clusterData = new Instances(data);//        //      clusterData.deleteAttributeAt(theClass-1);//        clusterData.deleteClassAttribute();//        hac.setVerbose(false);//        hac.setSeedable(true);//        hac.buildClusterer(seeds, clusterData, theClass, numClusters);    } catch (Exception e) {      e.printStackTrace();    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -