birchcluster.java
来自「wekaUT是 university texas austin 开发的基于wek」· Java 代码 · 共 1,095 行 · 第 1/2 页
JAVA
1,095 行
* Sets the percentage of noise set. * * @param newNoiseRate new percentage of noise */ public void setNoiseRate(double newNoiseRate) { m_NoiseRate = newNoiseRate; } /** * Gets the random generator. * * @return the random generator */ public Random getRandom() { if (m_Random == null) { m_Random = new Random (getSeed()); } return m_Random; } /** * Sets the random generator. * * @param newRandom is the random generator. */ public void setRandom(Random newRandom) { m_Random = newRandom; } /** * Gets the random number seed. * * @return the random number seed. */ public int getSeed() { return m_Seed; } /** * Sets the random number seed. * * @param newSeed the new random number seed. */ public void setSeed(int newSeed) { m_Seed = newSeed; } /** * Gets the dataset format. * * @return the dataset format. */ public Instances getDatasetFormat() { return m_DatasetFormat; } /** * Sets the dataset format. * * @param newDatasetFormat the new dataset format. */ public void setDatasetFormat(Instances newDatasetFormat) { m_DatasetFormat = newDatasetFormat; } /** * Gets the single mode flag. * * @return true if methode generateExample can be used. */ public boolean getSingleModeFlag() { return (false); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ public Enumeration listOptions() { Vector newVector = new Vector(5); newVector.addElement(new Option( "\tSet pattern to grid (default is random).", "G", 1, "-G")); newVector.addElement(new Option( "\tSet pattern to sine (default is random).", "S", 1, "-S")); newVector.addElement(new Option( "\tThe range of number of instances per cluster (default 1..50).", "N", 1, "-N <num>..<num>")); newVector.addElement(new Option( "\tThe range of radius per cluster (default 0.1..sqrt(2)).", "R", 1, "-R <num>..<num>")); newVector.addElement(new Option( "\tThe distance multiplier (default 4).", "M", 1, "-M <num>")); newVector.addElement(new Option( "\tThe number of cycles (default 4).", "C", 1, "-C <num>")); newVector.addElement(new Option( "\tSet input order to ordered (default is randomized).", "O", 1, "-O")); newVector.addElement(new Option( "\tThe noise rate in percent (default 0).", "P", 1, "-P <num>")); newVector.addElement(new Option( "\tThe Seed for random function (default 1).", "S", 1, "-S")); return newVector.elements(); } /** * Sets all options to their default values. <p> */ public void setDefaultOptions() { m_MinInstNum = 1; m_MaxInstNum = 50; m_MinRadius = 0.1; m_MaxRadius = Math.sqrt(2.0); m_Pattern = RANDOM; m_DistMult = 4; m_NumCycles = 4; m_InputOrder = RANDOMIZED; m_NoiseRate = 0.0; m_Seed = 1; } /** * Parses a list of options for this object. <p> * * For list of valid options see class description.<p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDefaultOptions(); String num; String fromTo; fromTo = Utils.getOption('N', options); if (fromTo.length() != 0) { setInstNums(fromTo); } fromTo = Utils.getOption('R', options); if (fromTo.length() != 0) { setRadiuses(fromTo); } boolean grid = Utils.getFlag('G', options); boolean sine = Utils.getFlag('I', options); if (grid && sine) throw new Exception("Flags G and I can only be set mutually exclusiv."); if (grid) setPattern(GRID); if (sine) setPattern(SINE); num = Utils.getOption('M', options); if (num.length() != 0) { if (!grid) throw new Exception("Option M can only be used with GRID pattern."); setDistMult(Double.valueOf(num).doubleValue()); } num = Utils.getOption('C', options); if (num.length() != 0) { if (!sine) throw new Exception("Option C can only be used with SINE pattern."); setNumCycles((int)Double.valueOf(num).doubleValue()); } boolean ordered = Utils.getFlag('O', options); if (ordered) setInputOrder(ORDERED); num = Utils.getOption('P', options); if (num.length() != 0) { setNoiseRate(Double.valueOf(num).doubleValue()); } num = Utils.getOption('S', options); if (num.length() != 0) { setSeed(Integer.parseInt(num)); } } /** * Gets the current settings of the datagenerator BIRCHCluster. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [20]; int i = 0; options[i++] = "-N"; options[i++] = "" + getInstNums(); options[i++] = "-R"; options[i++] = "" + getRadiuses(); if (getGridFlag()) { options[i++] = "-G"; options[i++] = ""; options[i++] = "-D"; options[i++] = "" + getDistMult(); } if (getSineFlag()) { options[i++] = "-I"; options[i++] = ""; options[i++] = "-C"; options[i++] = "" + getNumCycles(); } if (getOrderedFlag()) { options[i++] = "-O"; options[i++] = ""; } options[i++] = "-P"; options[i++] = "" + getNoiseRate(); while (i < options.length) { options[i++] = ""; } return options; } /** * Initializes the format for the dataset produced. * * @return the output data format * @exception Exception data format could not be defined */ public Instances defineDataFormat() throws Exception { Random random = new Random (getSeed()); setRandom(random); Instances dataset; FastVector attributes = new FastVector(3); Attribute attribute; boolean classFlag = getClassFlag(); FastVector classValues = null; if (classFlag) classValues = new FastVector (m_NumClusters); // define dataset for (int i = 0; i < getNumAttributes(); i++) { attribute = new Attribute("X" + i); attributes.addElement(attribute); } if (classFlag) { for (int i = 0; i < m_NumClusters; i++) { classValues.addElement("c" + i); } attribute = new Attribute ("class", classValues); attributes.addElement(attribute); } dataset = new Instances(getRelationName(), attributes, 0); if (classFlag) dataset.setClassIndex(m_NumAttributes); // set dataset format of this class Instances format = new Instances(dataset, 0); setDatasetFormat(format); m_ClusterList = defineClusters(random); System.out.println("dataset" + dataset.numAttributes()); return dataset; } /** * Generate an example of the dataset. * @return the instance generated * @exception Exception if format not defined or generating <br> * examples one by one is not possible, because voting is chosen */ public Instance generateExample() throws Exception { throw new Exception("Examples cannot be generated" + " one by one."); } /** * Generate all examples of the dataset. * @return the instance generated * @exception Exception if format not defined */ public Instances generateExamples() throws Exception { Random random = getRandom(); Instances data = getDatasetFormat(); if (data == null) throw new Exception("Dataset format not defined."); // generate examples if (getOrderedFlag()) data = generateExamples(random, data); else throw new Exception("RANDOMIZED is not yet implemented."); return (data); } /** * Generate all examples of the dataset. * @return the instance generated * @exception Exception if format not defined */ public Instances generateExamples(Random random, Instances format) throws Exception { Instance example = null; if (format == null) throw new Exception("Dataset format not defined."); // generate examples for one cluster after another int cNum = 0; for (Enumeration enum = m_ClusterList.elements(); enum.hasMoreElements(); cNum++) { Cluster cl = (Cluster) enum.nextElement(); double stdDev = cl.getStdDev(); int instNum = cl.getInstNum(); double [] center = cl.getCenter(); String cName = "c" + cNum; for (int i = 0; i < instNum; i++) { // generate example example = generateInstance (format, random, stdDev, center, cName); if (example != null) example.setDataset(format); format.add(example); } } return (format); } /** * Generate an example of the dataset. * @return the instance generated * @exception Exception if format not defined or generating <br> * examples one by one is not possible, because voting is chosen */ private Instance generateInstance (Instances format, Random randomG, double stdDev, double [] center, String cName ) { Instance example; int numAtts = m_NumAttributes; if (getClassFlag()) numAtts++; example = new Instance(numAtts); example.setDataset(format); for (int i = 0; i < m_NumAttributes; i++) { example.setValue(i, randomG.nextGaussian() * stdDev + center[i]); } if (getClassFlag()) { example.setClassValue(cName); } return example; } /** * Defines the clusters * * @param random random number generator */ private FastVector defineClusters(Random random) throws Exception { if (m_Pattern == GRID) return defineClustersGRID(random); else return defineClustersRANDOM(random); } /** * Defines the clusters if pattern is GRID * * @param random random number generator */ private FastVector defineClustersGRID(Random random) throws Exception { FastVector clusters = new FastVector(m_NumClusters); double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum); double minInstNum = (double) m_MinInstNum; double diffRadius = m_MaxRadius - m_MinRadius; Cluster cluster; // compute gridsize double gs = Math.pow(m_NumClusters, 1.0 / m_NumAttributes); if (gs - ((double) ((int) gs)) > 0.0) { m_GridSize = (int) (gs + 1.0); } else { m_GridSize = (int) gs; } // compute gridwidth m_GridWidth = ((m_MaxRadius + m_MinRadius) / 2) * m_DistMult; System.out.println("GridSize= " + m_GridSize); System.out.println("GridWidth= " + m_GridWidth); // initialize gridvector with zeros GridVector gv = new GridVector(m_NumAttributes, m_GridSize); for (int i = 0; i < m_NumClusters; i++) { int instNum = (int) (random.nextDouble() * diffInstNum + minInstNum); double radius = (random.nextDouble() * diffRadius) + m_MinRadius; // center is defined in the constructor of cluster cluster = new Cluster(instNum, radius, gv.getGridVector(), m_GridWidth); clusters.addElement((Object) cluster); gv.addOne(); } return clusters; } /** * Defines the clusters if pattern is RANDOM * * @param random random number generator */ private FastVector defineClustersRANDOM(Random random) throws Exception { FastVector clusters = new FastVector(m_NumClusters); double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum); double minInstNum = (double) m_MinInstNum; double diffRadius = m_MaxRadius - m_MinRadius; Cluster cluster; for (int i = 0; i < m_NumClusters; i++) { int instNum = (int) (random.nextDouble() * diffInstNum + minInstNum); double radius = (random.nextDouble() * diffRadius) + m_MinRadius; // center is defined in the constructor of cluster cluster = new Cluster(instNum, radius, random); clusters.addElement((Object) cluster); } return clusters; } /** * Compiles documentation about the data generation after * the generation process * * @return string with additional information about generated dataset * @exception Exception no input structure has been defined */ public String generateFinished() throws Exception { StringBuffer docu = new StringBuffer(); Instances format = getDatasetFormat();//just for exception // string is empty docu.append("\n%\n%\n"); return docu.toString(); } /** * Compiles documentation about the data generation before * the generation process * * @return string with additional information */ public String generateStart() { StringBuffer docu = new StringBuffer(); // string is empty docu.append("\n%\n%\n"); int sumInst = 0; int cNum = 0; for (Enumeration enum = m_ClusterList.elements(); enum.hasMoreElements(); cNum++) { Cluster cl = (Cluster) enum.nextElement(); docu.append("%\n"); docu.append("% Cluster: c"+ cNum + "\n"); docu.append("% ----------------------------------------------\n"); docu.append("% StandardDeviation: " + Utils.doubleToString(cl.getStdDev(), 2) + "\n"); docu.append("% Number of instances: " + cl.getInstNum() + "\n"); sumInst += cl.getInstNum(); double [] center = cl.getCenter(); docu.append("% "); for (int i = 0; i < center.length - 1; i++) { docu.append(Utils.doubleToString(center[i], 2) + ", "); } docu.append(Utils.doubleToString(center[center.length - 1], 2) + "\n"); } docu.append("\n% ----------------------------------------------\n"); docu.append("% Total number of instances: " + sumInst + "\n"); docu.append("% in " + cNum + " clusters\n"); docu.append("% Pattern chosen : "); if (getGridFlag()) docu.append("GRID, " + "distance multiplier = " + Utils.doubleToString(m_DistMult, 2) + "\n"); else if (getSineFlag()) docu.append("SINE\n"); else docu.append("RANDOM\n"); return docu.toString(); } /** * Main method for testing this class. * * @param argv should contain arguments for the data producer: */ public static void main(String [] argv) { try { ClusterGenerator.makeData(new BIRCHCluster(), argv); } catch (Exception ex) { System.out.println(ex.getMessage()); } }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?