pcsoftkmeans.java
来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,952 行 · 第 1/5 页
JAVA
1,952 行
return newVector.elements(); } public String [] getOptions () { String[] options = new String[80]; int current = 0; if (!m_Seedable) { options[current++] = "-NS"; } options[current++] = "-A"; options[current++] = "" + getAlgorithm().getSelectedTag().getID(); options[current++] = "-N"; options[current++] = "" + getNumClusters(); options[current++] = "-R"; options[current++] = "" + getRandomSeed(); options[current++] = "-ML"; options[current++] = "" + m_MustLinkWeight; options[current++] = "-CL"; options[current++] = "" + m_CannotLinkWeight; options[current++] = "-M"; options[current++] = Utils.removeSubstring(m_metric.getClass().getName(), "weka.core.metrics."); if (m_metric instanceof OptionHandler) { String[] metricOptions = ((OptionHandler)m_metric).getOptions(); for (int i = 0; i < metricOptions.length; i++) { options[current++] = metricOptions[i]; } } while (current < options.length) { options[current++] = ""; } return options; } /** * Parses a given list of options. * @param options the list of options as an array of strings * @exception Exception if an option is not supported * **/ public void setOptions (String[] options) throws Exception { String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { setNumClusters(Integer.parseInt(optionString)); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setRandomSeed(Integer.parseInt(optionString)); } optionString = Utils.getOption('A', options); if (optionString.length() != 0) { setAlgorithm(new SelectedTag(Integer.parseInt(optionString), TAGS_ALGORITHM)); } optionString = Utils.getOption('M', options); if (optionString.length() != 0) { String[] metricSpec = Utils.splitOptions(optionString); String metricName = metricSpec[0]; metricSpec[0] = ""; setMetric((LearnableMetric) LearnableMetric.forName(metricName, metricSpec)); } } /** * return a string describing this clusterer * * @return a description of the clusterer as a string */ public String toString() { StringBuffer temp = new StringBuffer(); temp.append("\nkMeans\n======\n"); temp.append("\nNumber of iterations: " + m_Iterations+"\n");// temp.append("\nCluster centroids:\n");// for (int i = 0; i < m_NumClusters; i++) {// temp.append("\nCluster "+i+"\n\t");// }// temp.append("\n"); return temp.toString(); } /** * set the verbosity level of the clusterer * @param verbose messages on(true) or off (false) */ public void setVerbose (boolean verbose) { m_verbose = verbose; } /** * get the verbosity level of the clusterer * @return messages on(true) or off (false) */ public boolean getVerbose () { return m_verbose; } /** * Train the clusterer using specified parameters * * @param instances Instances to be used for training */ public void trainClusterer (Instances instances) throws Exception { if (m_metric instanceof LearnableMetric) { if (((LearnableMetric)m_metric).getTrainable()) { ((LearnableMetric)m_metric).learnMetric(instances); } else { throw new Exception ("Metric is not trainable"); } } else { throw new Exception ("Metric is not trainable"); } } /** Normalizes Instance or SparseInstance * * @author Sugato Basu * @param inst Instance to be normalized */ public void normalize(Instance inst) throws Exception { if (inst instanceof SparseInstance) { normalizeSparseInstance(inst); } else { normalizeInstance(inst); } } /** Normalizes the values of a normal Instance in L2 norm * * @author Sugato Basu * @param inst Instance to be normalized */ public void normalizeInstance(Instance inst) throws Exception{ double norm = 0; double values [] = inst.toDoubleArray(); if (inst instanceof SparseInstance) { System.err.println("Is SparseInstance, using normalizeSparseInstance function instead"); normalizeSparseInstance(inst); } for (int i=0; i<values.length; i++) { if (i != inst.classIndex()) { // don't normalize the class index norm += values[i] * values[i]; } } norm = Math.sqrt(norm); for (int i=0; i<values.length; i++) { if (i != inst.classIndex()) { // don't normalize the class index values[i] /= norm; } } inst.setValueArray(values); } /** Normalizes the values of a SparseInstance in L2 norm * * @author Sugato Basu * @param inst SparseInstance to be normalized */ public void normalizeSparseInstance(Instance inst) throws Exception{ double norm=0; int length = inst.numValues(); if (!(inst instanceof SparseInstance)) { System.err.println("Not SparseInstance, using normalizeInstance function instead"); normalizeInstance(inst); } for (int i=0; i<length; i++) { if (inst.index(i) != inst.classIndex()) { // don't normalize the class index norm += inst.valueSparse(i) * inst.valueSparse(i); } } norm = Math.sqrt(norm); for (int i=0; i<length; i++) { // don't normalize the class index if (inst.index(i) != inst.classIndex()) { inst.setValueSparse(i, inst.valueSparse(i)/norm); } } } /** Fast version of meanOrMode - streamlined from Instances.meanOrMode for efficiency * Does not check for missing attributes, assumes numeric attributes, assumes Sparse instances */ protected double[] meanOrMode(Instances insts) { int numAttributes = insts.numAttributes(); double [] value = new double[numAttributes]; double weight = 0; for (int i=0; i<numAttributes; i++) { value[i] = 0; } for (int j=0; j<insts.numInstances(); j++) { SparseInstance inst = (SparseInstance) (insts.instance(j)); weight += inst.weight(); for (int i=0; i<inst.numValues(); i++) { int indexOfIndex = inst.index(i); value[indexOfIndex] += inst.weight() * inst.valueSparse(i); } } if (Utils.eq(weight, 0)) { for (int k=0; k<numAttributes; k++) { value[k] = 0; } } else { for (int k=0; k<numAttributes; k++) { value[k] = value[k] / weight; } } return value; } /** * Gets a Double representing the current date and time. * eg: 1:46pm on 20/5/1999 -> 19990520.1346 * * @return a value of type Double */ public static Double getTimeStamp() { Calendar now = Calendar.getInstance(TimeZone.getTimeZone("UTC")); double timestamp = now.getTimeInMillis(); return new Double(timestamp); } /** * Main method for testing this class. * */ public static void main (String[] args) { try { testCase(); //System.out.println(ClusterEvaluation.evaluateClusterer(new PCSoftKMeans(), args)); } catch (Exception e) { System.out.println(e.getMessage()); e.printStackTrace(); } } protected static void testCase() { try { //String dataset = new String("lowd"); String dataset = new String("highd"); if (dataset.equals("lowd")) { //////// Low-D data String datafile = "/u/ml/software/weka-latest/data/iris.arff"; // set up the data FileReader reader = new FileReader (datafile); Instances data = new Instances (reader); // Make the last attribute be the class int classIndex = data.numAttributes()-1; data.setClassIndex(classIndex); // starts with 0 System.out.println("ClassIndex is: " + classIndex); // Remove the class labels before clustering Instances clusterData = new Instances(data); clusterData.deleteClassAttribute(); // create random constraints from the labeled training data int numPairs = 10, num=0; ArrayList labeledPair = new ArrayList(numPairs); Random rand = new Random(42); System.out.println("Initializing constraint matrix:"); while (num < numPairs) { int i = (int) (data.numInstances()*rand.nextFloat()); int j = (int) (data.numInstances()*rand.nextFloat()); int first = (i<j)? i:j; int second = (i>=j)? i:j; int linkType = (data.instance(first).classValue() == data.instance(second).classValue())? InstancePair.MUST_LINK:InstancePair.CANNOT_LINK; InstancePair pair = new InstancePair(first, second, linkType); if (first!=second && !labeledPair.contains(pair)) { labeledPair.add(pair); num++; } } System.out.println("Finished initializing constraints"); // create clusterer PCSoftKMeans pckmeans = new PCSoftKMeans(); System.out.println("\nClustering the iris data using PCKmeans...\n"); pckmeans.setAlgorithm(new SelectedTag(ALGORITHM_SIMPLE, TAGS_ALGORITHM)); WeightedEuclidean euclidean = new WeightedEuclidean(); euclidean.setExternal(false); pckmeans.setMetric(euclidean); pckmeans.setVerbose(false); pckmeans.setSeedable(false); pckmeans.setNumClusters(data.numClasses()); // do clustering pckmeans.buildClusterer(labeledPair, clusterData, data, data.numInstances()); pckmeans.printIndexClusters(); } else if (dataset.equals("highd")) { //////// Newsgroup data String datafile = "/u/ml/data/CCSfiles/arffFromCCS/different-100_fromCCS.arff"; // set up the data FileReader reader = new FileReader (datafile); Instances data = new Instances (reader); // Make the last attribute be the class int classIndex = data.numAttributes()-1; data.setClassIndex(classIndex); // starts with 0 System.out.println("ClassIndex is: " + classIndex); // Remove the class labels before clustering Instances clusterData = new Instances(data); clusterData.deleteClassAttribute(); // create random constraints from the labeled training data int numPairs = 100, num=0; ArrayList labeledPair = new ArrayList(numPairs); Random rand = new Random(42); System.out.println("Initializing constraint matrix:"); while (num < numPairs) { int i = (int) (data.numInstances()*rand.nextFloat()); int j = (int) (data.numInstances()*rand.nextFloat()); int first = (i<j)? i:j; int second = (i>=j)? i:j; int linkType = (data.instance(first).classValue() == data.instance(second).classValue())? InstancePair.MUST_LINK:InstancePair.CANNOT_LINK; InstancePair pair = new InstancePair(first, second, linkType); if (first!=second && !labeledPair.contains(pair)) { labeledPair.add(pair); num++; } } System.out.println("Finished initializing constraints"); // create clusterer PCSoftKMeans pckmeans = new PCSoftKMeans(); System.out.println("\nClustering the news data using PCKmeans...\n"); pckmeans.resetClusterer(); pckmeans.setAlgorithm(new SelectedTag(ALGORITHM_SPHERICAL, TAGS_ALGORITHM)); WeightedDotP dotp = new WeightedDotP(); dotp.setExternal(false); dotp.setLengthNormalized(true); pckmeans.setMetric(dotp); pckmeans.setVerbose(false); pckmeans.setSeedable(true); pckmeans.setNumClusters(data.numClasses()); // do clustering pckmeans.buildClusterer(labeledPair, clusterData, data, clusterData.numInstances()); pckmeans.printIndexClusters(); } } catch (Exception e) { e.printStackTrace(); } }}// TODO: Add init using farthest first
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?