📄 birchcluster.java
字号:
* @return the current pattern type */ public SelectedTag getPattern() { return new SelectedTag(m_Pattern, TAGS_PATTERN); } /** * Sets the pattern type. * * @param value new pattern type */ public void setPattern(SelectedTag value) { if (value.getTags() == TAGS_PATTERN) m_Pattern = value.getSelectedTag().getID(); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String patternTipText() { return "The pattern for generating the data."; } /** * returns the default distance multiplier * * @return the default distance multiplier */ protected double defaultDistMult() { return 4.0; } /** * Gets the distance multiplier. * * @return the distance multiplier */ public double getDistMult() { return m_DistMult; } /** * Sets the distance multiplier. * * @param newDistMult new distance multiplier */ public void setDistMult(double newDistMult) { m_DistMult = newDistMult; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String distMultTipText() { return "The distance multiplier (in combination with the 'Grid' pattern)."; } /** * returns the default number of cycles * * @return the default number of cycles */ protected int defaultNumCycles() { return 4; } /** * Gets the number of cycles. * * @return the number of cycles */ public int getNumCycles() { return m_NumCycles; } /** * Sets the the number of cycles. * * @param newNumCycles new number of cycles */ public void setNumCycles(int newNumCycles) { m_NumCycles = newNumCycles; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String numCyclesTipText() { return "The number of cycles to use (in combination with the 'Sine' pattern)."; } /** * returns the default input order * * @return the default input order */ protected SelectedTag defaultInputOrder() { return new SelectedTag(ORDERED, TAGS_INPUTORDER); // TODO: the only one that is currently implemented, normally RANDOMIZED } /** * Gets the input order. * * @return the current input order */ public SelectedTag getInputOrder() { return new SelectedTag(m_InputOrder, TAGS_INPUTORDER); } /** * Sets the input order. * * @param value new input order */ public void setInputOrder(SelectedTag value) { if (value.getTags() == TAGS_INPUTORDER) m_InputOrder = value.getSelectedTag().getID(); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String inputOrderTipText() { return "The input order to use."; } /** * Gets the ordered flag (option O). * * @return true if ordered flag is set */ public boolean getOrderedFlag() { return m_InputOrder == ORDERED; } /** * returns the default noise rate * * @return the default noise rate */ protected double defaultNoiseRate() { return 0.0; } /** * Gets the percentage of noise set. * * @return the percentage of noise set */ public double getNoiseRate() { return m_NoiseRate; } /** * Sets the percentage of noise set. * * @param newNoiseRate new percentage of noise */ public void setNoiseRate(double newNoiseRate) { m_NoiseRate = newNoiseRate; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String noiseRateTipText() { return "The noise rate to use."; } /** * Gets the single mode flag. * * @return true if methode generateExample can be used. */ public boolean getSingleModeFlag() { return false; } /** * Initializes the format for the dataset produced. * * @return the output data format * @throws Exception data format could not be defined */ public Instances defineDataFormat() throws Exception { Random random = new Random (getSeed()); setRandom(random); Instances dataset; FastVector attributes = new FastVector(3); Attribute attribute; boolean classFlag = getClassFlag(); FastVector classValues = null; if (classFlag) classValues = new FastVector (m_NumClusters); // define dataset for (int i = 0; i < getNumAttributes(); i++) { attribute = new Attribute("X" + i); attributes.addElement(attribute); } if (classFlag) { for (int i = 0; i < m_NumClusters; i++) classValues.addElement("c" + i); attribute = new Attribute ("class", classValues); attributes.addElement(attribute); } dataset = new Instances(getRelationNameToUse(), attributes, 0); if (classFlag) dataset.setClassIndex(getNumAttributes()); // set dataset format of this class Instances format = new Instances(dataset, 0); setDatasetFormat(format); m_ClusterList = defineClusters(random); //System.out.println("dataset" + dataset.numAttributes()); return dataset; } /** * Generate an example of the dataset. * @return the instance generated * @throws Exception if format not defined or generating <br/> * examples one by one is not possible, because voting is chosen */ public Instance generateExample() throws Exception { throw new Exception("Examples cannot be generated" + " one by one."); } /** * Generate all examples of the dataset. * @return the instance generated * @throws Exception if format not defined */ public Instances generateExamples() throws Exception { Random random = getRandom(); Instances data = getDatasetFormat(); if (data == null) throw new Exception("Dataset format not defined."); // generate examples if (getOrderedFlag()) data = generateExamples(random, data); else throw new Exception("RANDOMIZED is not yet implemented."); return (data); } /** * Generate all examples of the dataset. * * @param random the random number generator to use * @param format the dataset format * @return the instance generated * @throws Exception if format not defined */ public Instances generateExamples(Random random, Instances format) throws Exception { Instance example = null; if (format == null) throw new Exception("Dataset format not defined."); // generate examples for one cluster after another int cNum = 0; for (Enumeration enm = m_ClusterList.elements(); enm.hasMoreElements(); cNum++) { Cluster cl = (Cluster) enm.nextElement(); double stdDev = cl.getStdDev(); int instNum = cl.getInstNum(); double[] center = cl.getCenter(); String cName = "c" + cNum; for (int i = 0; i < instNum; i++) { // generate example example = generateInstance( format, random, stdDev, center, cName); if (example != null) example.setDataset(format); format.add(example); } } return (format); } /** * Generate an example of the dataset. * * @param format the dataset format * @param randomG the random number generator * @param stdDev the standard deviation to use * @param center the centers * @param cName the class value * @return the instance generated * examples one by one is not possible, because voting is chosen */ private Instance generateInstance (Instances format, Random randomG, double stdDev, double[] center, String cName) { Instance example; int numAtts = getNumAttributes(); if (getClassFlag()) numAtts++; example = new Instance(numAtts); example.setDataset(format); for (int i = 0; i < getNumAttributes(); i++) example.setValue(i, randomG.nextGaussian() * stdDev + center[i]); if (getClassFlag()) example.setClassValue(cName); return example; } /** * Defines the clusters * * @param random random number generator * @return the cluster definitions * @throws Exception if defining fails */ private FastVector defineClusters(Random random) throws Exception { if (m_Pattern == GRID) return defineClustersGRID(random); else return defineClustersRANDOM(random); } /** * Defines the clusters if pattern is GRID * * @param random random number generator * @return the defined clusters for GRID * @throws Exception if something goes wrong */ private FastVector defineClustersGRID(Random random) throws Exception { FastVector clusters = new FastVector(m_NumClusters); double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum); double minInstNum = (double) m_MinInstNum; double diffRadius = m_MaxRadius - m_MinRadius; Cluster cluster; // compute gridsize double gs = Math.pow(m_NumClusters, 1.0 / getNumAttributes()); if (gs - ((double) ((int) gs)) > 0.0) { m_GridSize = (int) (gs + 1.0); } else { m_GridSize = (int) gs; } // compute gridwidth m_GridWidth = ((m_MaxRadius + m_MinRadius) / 2) * m_DistMult; //System.out.println("GridSize= " + m_GridSize); //System.out.println("GridWidth= " + m_GridWidth); // initialize gridvector with zeros GridVector gv = new GridVector(getNumAttributes(), m_GridSize); for (int i = 0; i < m_NumClusters; i++) { int instNum = (int) (random.nextDouble() * diffInstNum + minInstNum); double radius = (random.nextDouble() * diffRadius) + m_MinRadius; // center is defined in the constructor of cluster cluster = new Cluster(instNum, radius, gv.getGridVector(), m_GridWidth); clusters.addElement((Object) cluster); gv.addOne(); } return clusters; } /** * Defines the clusters if pattern is RANDOM * * @param random random number generator * @return the cluster definitions * @throws Exception if something goes wrong */ private FastVector defineClustersRANDOM(Random random) throws Exception { FastVector clusters = new FastVector(m_NumClusters); double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum); double minInstNum = (double) m_MinInstNum; double diffRadius = m_MaxRadius - m_MinRadius; Cluster cluster; for (int i = 0; i < m_NumClusters; i++) { int instNum = (int) (random.nextDouble() * diffInstNum + minInstNum); double radius = (random.nextDouble() * diffRadius) + m_MinRadius; // center is defined in the constructor of cluster cluster = new Cluster(instNum, radius, random); clusters.addElement((Object) cluster); } return clusters; } /** * Compiles documentation about the data generation after * the generation process * * @return string with additional information about generated dataset * @throws Exception no input structure has been defined */ public String generateFinished() throws Exception { return ""; } /** * Compiles documentation about the data generation before * the generation process * * @return string with additional information */ public String generateStart() { StringBuffer docu = new StringBuffer(); int sumInst = 0; int cNum = 0; for (Enumeration enm = m_ClusterList.elements(); enm.hasMoreElements(); cNum++) { Cluster cl = (Cluster) enm.nextElement(); docu.append("%\n"); docu.append("% Cluster: c"+ cNum + "\n"); docu.append("% ----------------------------------------------\n"); docu.append("% StandardDeviation: " + Utils.doubleToString(cl.getStdDev(), 2) + "\n"); docu.append("% Number of instances: " + cl.getInstNum() + "\n"); sumInst += cl.getInstNum(); double[] center = cl.getCenter(); docu.append("% "); for (int i = 0; i < center.length - 1; i++) { docu.append(Utils.doubleToString(center[i], 2) + ", "); } docu.append(Utils.doubleToString(center[center.length - 1], 2) + "\n"); } docu.append("%\n% ----------------------------------------------\n"); docu.append("% Total number of instances: " + sumInst + "\n"); docu.append("% in " + cNum + " clusters\n"); docu.append("% Pattern chosen : "); if (m_Pattern == GRID) docu.append( "GRID, " + "distance multiplier = " + Utils.doubleToString(m_DistMult, 2) + "\n"); else if (m_Pattern == SINE) docu.append("SINE\n"); else docu.append("RANDOM\n"); return docu.toString(); } /** * Main method for testing this class. * * @param args should contain arguments for the data producer: */ public static void main(String[] args) { runDataGenerator(new BIRCHCluster(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -