📄 subspacecluster.java
字号:
// check whether all the attributes are covered count = new int[getNumAttributes()]; for (i = 0; i < getNumAttributes(); i++) { for (n = 0; n < getClusters().length; n++) { cl = (SubspaceClusterDefinition) getClusters()[n]; r = new Range(cl.getAttrIndexRange()); r.setUpper(getNumAttributes()); if (r.isInRange(i)) count[i]++; } } // list all indices that are not covered attrIndex = ""; for (i = 0; i < count.length; i++) { if (count[i] == 0) { if (attrIndex.length() != 0) attrIndex += ","; attrIndex += (i+1); } } if (attrIndex.length() != 0) throw new IllegalArgumentException( "The following attributes are not covered by a cluster " + "definition: " + attrIndex + "\n"); return true; } /** * Gets the single mode flag. * * @return true if methode generateExample can be used. */ public boolean getSingleModeFlag() { return false; } /** * Initializes the format for the dataset produced. * * @return the output data format * @throws Exception data format could not be defined */ public Instances defineDataFormat() throws Exception { // initialize setOptions(getOptions()); checkCoverage(); Random random = new Random (getSeed()); setRandom(random); Instances dataset; FastVector attributes = new FastVector(3); Attribute attribute; boolean classFlag = getClassFlag(); FastVector classValues = null; if (classFlag) classValues = new FastVector(getClusters().length); FastVector boolValues = new FastVector(2); boolValues.addElement("false"); boolValues.addElement("true"); FastVector nomValues = null; // define dataset for (int i = 0; i < getNumAttributes(); i++) { // define boolean attribute if (m_booleanCols.isInRange(i)) { attribute = new Attribute("B" + i, boolValues); } else if (m_nominalCols.isInRange(i)) { // define nominal attribute nomValues = new FastVector(m_numValues[i]); for (int j = 0; j < m_numValues[i]; j++) nomValues.addElement("value-" + j); attribute = new Attribute("N" + i, nomValues); } else { // numerical attribute attribute = new Attribute("X" + i); } attributes.addElement(attribute); } if (classFlag) { for (int i = 0; i < getClusters().length; i++) classValues.addElement("c" + i); attribute = new Attribute ("class", classValues); attributes.addElement(attribute); } dataset = new Instances(getRelationNameToUse(), attributes, 0); if (classFlag) dataset.setClassIndex(m_NumAttributes); // set dataset format of this class Instances format = new Instances(dataset, 0); setDatasetFormat(format); for (int i = 0; i < getClusters().length; i++) { SubspaceClusterDefinition cl = (SubspaceClusterDefinition) getClusters()[i]; cl.setNumInstances(random); cl.setParent(this); } return dataset; } /** * Returns true if attribute is boolean *@param index of the attribute *@return true if the attribute is boolean */ public boolean isBoolean(int index) { return m_booleanCols.isInRange(index); } /** * Returns true if attribute is nominal *@param index of the attribute *@return true if the attribute is nominal */ public boolean isNominal(int index) { return m_nominalCols.isInRange(index); } /** * returns array that stores the number of values for a nominal attribute. * * @return the array that stores the number of values for a nominal attribute */ public int[] getNumValues() { return m_numValues; } /** * Generate an example of the dataset. * @return the instance generated * @throws Exception if format not defined or generating <br/> * examples one by one is not possible, because voting is chosen */ public Instance generateExample() throws Exception { throw new Exception("Examples cannot be generated one by one."); } /** * Generate all examples of the dataset. * @return the instance generated * @throws Exception if format not defined */ public Instances generateExamples() throws Exception { Instances format = getDatasetFormat(); Instance example = null; if (format == null) throw new Exception("Dataset format not defined."); // generate examples for one cluster after another for (int cNum = 0; cNum < getClusters().length; cNum++) { SubspaceClusterDefinition cl = (SubspaceClusterDefinition) getClusters()[cNum]; //get the number of instances to create int instNum = cl.getNumInstances(); //class value is c + cluster number String cName = "c" + cNum; switch (cl.getClusterType().getSelectedTag().getID()) { case (UNIFORM_RANDOM): for (int i = 0; i < instNum; i++) { // generate example example = generateExample(format, getRandom(), cl, cName); if (example != null) format.add(example); } break; case (TOTAL_UNIFORM): // generate examples if (!cl.isInteger()) generateUniformExamples(format, instNum, cl, cName); else generateUniformIntegerExamples(format, instNum, cl, cName); break; case (GAUSSIAN): // generate examples generateGaussianExamples(format, instNum, getRandom(), cl, cName); break; } } return format; } /** * Generate an example of the dataset. * * @param format the dataset format * @param randomG the random number generator to use * @param cl the cluster definition * @param cName the class value * @return the generated instance */ private Instance generateExample( Instances format, Random randomG, SubspaceClusterDefinition cl, String cName) { boolean makeInteger = cl.isInteger(); int num = -1; Instance example = null; int numAtts = m_NumAttributes; if (getClassFlag()) numAtts++; example = new Instance(numAtts); example.setDataset(format); boolean[] attributes = cl.getAttributes(); double[] minValue = cl.getMinValue(); double[] maxValue = cl.getMaxValue(); double value; int clusterI = -1; for (int i = 0; i < m_NumAttributes; i++) { if (attributes[i]) { clusterI++; num++; // boolean or nominal attribute if (isBoolean(i) || isNominal(i)) { if (minValue[clusterI] == maxValue[clusterI]) { value = minValue[clusterI]; } else { int numValues = (int)(maxValue[clusterI] - minValue[clusterI] + 1.0); value = randomG.nextInt(numValues); value += minValue[clusterI]; } } else { // numeric attribute value = randomG.nextDouble() * (maxValue[num] - minValue[num]) + minValue[num]; if (makeInteger) value = Math.round(value); } example.setValue(i, value); } else { example.setMissing(i); } } if (getClassFlag()) example.setClassValue(cName); return example; } /** * Generate examples for a uniform cluster dataset. * * @param format the dataset format * @param numInstances the number of instances to generator * @param cl the cluster definition * @param cName the class value */ private void generateUniformExamples( Instances format, int numInstances, SubspaceClusterDefinition cl, String cName) { Instance example = null; int numAtts = m_NumAttributes; if (getClassFlag()) numAtts++; example = new Instance(numAtts); example.setDataset(format); boolean[] attributes = cl.getAttributes(); double[] minValue = cl.getMinValue(); double[] maxValue = cl.getMaxValue(); double[] diff = new double[minValue.length]; for (int i = 0; i < minValue.length; i++) diff[i] = (maxValue[i] - minValue[i]); for (int j = 0; j < numInstances; j++) { int num = -1; for (int i = 0; i < m_NumAttributes; i++) { if (attributes[i]) { num++; double value = minValue[num] + (diff[num] * (double)((double)j / (double)(numInstances - 1))); example.setValue(i, value); } else { example.setMissing(i); } } if (getClassFlag()) example.setClassValue(cName); format.add(example); } } /** * Generate examples for a uniform cluster dataset. * * @param format the dataset format * @param numInstances the number of instances to generator * @param cl the cluster definition * @param cName the class value */ private void generateUniformIntegerExamples( Instances format, int numInstances, SubspaceClusterDefinition cl, String cName) { Instance example = null; int numAtts = m_NumAttributes; if (getClassFlag()) numAtts++; example = new Instance(numAtts); example.setDataset(format); boolean[] attributes = cl.getAttributes(); double[] minValue = cl.getMinValue(); double[] maxValue = cl.getMaxValue(); int[] minInt = new int[minValue.length]; int[] maxInt = new int[maxValue.length]; int[] intValue = new int[maxValue.length]; int[] numInt = new int[minValue.length]; int num = 1; for (int i = 0; i < minValue.length; i++) { minInt[i] = (int)Math.ceil(minValue[i]); maxInt[i] = (int)Math.floor(maxValue[i]); numInt[i] = (maxInt[i] - minInt[i] + 1); num = num * numInt[i]; } int numEach = numInstances / num; int rest = numInstances - numEach * num; // initialize with smallest values combination for (int i = 0; i < m_NumAttributes; i++) { if (attributes[i]) { example.setValue(i, (double)minInt[i]); intValue[i] = minInt[i]; } else { example.setMissing(i); } } if (getClassFlag()) example.setClassValue(cName); int added = 0; int attr = 0; // do while not added all do { // add all for one value combination for (int k = 0; k < numEach; k++) { format.add(example); example = (Instance) example.copy(); added++; } if (rest > 0) { format.add(example); example = (Instance) example.copy(); added++; rest--; } if (added >= numInstances) break; // switch to the next value combination boolean done = false; do { if (attributes[attr] && (intValue[attr] + 1 <= maxInt[attr])) { intValue[attr]++; done = true; } else { attr++; } } while (!done); example.setValue(attr, (double)intValue[attr]); } while (added < numInstances); } /** * Generate examples for a uniform cluster dataset. * * @param format the dataset format * @param numInstances the number of instances to generate * @param random the random number generator * @param cl the cluster definition * @param cName the class value */ private void generateGaussianExamples( Instances format, int numInstances, Random random, SubspaceClusterDefinition cl, String cName) { boolean makeInteger = cl.isInteger(); Instance example = null; int numAtts = m_NumAttributes; if (getClassFlag()) numAtts++; example = new Instance(numAtts); example.setDataset(format); boolean[] attributes = cl.getAttributes(); double[] meanValue = cl.getMeanValue(); double[] stddevValue = cl.getStddevValue(); for (int j = 0; j < numInstances; j++) { int num = -1; for (int i = 0; i < m_NumAttributes; i++) { if (attributes[i]) { num++; double value = meanValue[num] + (random.nextGaussian() * stddevValue[num]); if (makeInteger) value = Math.round(value); example.setValue(i, value); } else { example.setMissing(i); } } if (getClassFlag()) example.setClassValue(cName); format.add(example); } } /** * Compiles documentation about the data generation after * the generation process * * @return string with additional information about generated dataset * @throws Exception no input structure has been defined */ public String generateFinished() throws Exception { return ""; } /** * Compiles documentation about the data generation before * the generation process * * @return string with additional information */ public String generateStart() { StringBuffer docu = new StringBuffer(); int sumInst = 0; for (int cNum = 0; cNum < getClusters().length; cNum++) { SubspaceClusterDefinition cl = (SubspaceClusterDefinition) getClusters()[cNum]; docu.append("%\n"); docu.append("% Cluster: c"+ cNum + " "); switch (cl.getClusterType().getSelectedTag().getID()) { case UNIFORM_RANDOM: docu.append("Uniform Random"); break; case TOTAL_UNIFORM: docu.append("Total Random"); break; case GAUSSIAN: docu.append("Gaussian"); break; } if (cl.isInteger()) { docu.append(" / INTEGER"); } docu.append("\n% ----------------------------------------------\n"); docu.append("%"+cl.attributesToString()); docu.append("\n% Number of Instances: " + cl.getInstNums() + "\n"); docu.append( "% Generated Number of Instances: " + cl.getNumInstances() + "\n"); sumInst += cl.getNumInstances(); } docu.append("%\n% ----------------------------------------------\n"); docu.append("% Total Number of Instances: " + sumInst + "\n"); docu.append("% in " + getClusters().length + " Cluster(s)\n%"); return docu.toString(); } /** * Main method for testing this class. * * @param args should contain arguments for the data producer: */ public static void main(String[] args) { runDataGenerator(new SubspaceCluster(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -