📄 birchcluster.java
字号:
setInputOrder(ORDERED);
num = Utils.getOption('P', options);
if (num.length() != 0) {
setNoiseRate(Double.valueOf(num).doubleValue());
}
num = Utils.getOption('S', options);
if (num.length() != 0) {
setSeed(Integer.parseInt(num));
}
}
/**
* Gets the current settings of the datagenerator BIRCHCluster.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
String [] options = new String [20];
int i = 0;
options[i++] = "-N"; options[i++] = "" + getInstNums();
options[i++] = "-R"; options[i++] = "" + getRadiuses();
if (getGridFlag()) {
options[i++] = "-G"; options[i++] = "";
options[i++] = "-D"; options[i++] = "" + getDistMult();
}
if (getSineFlag()) {
options[i++] = "-I"; options[i++] = "";
options[i++] = "-C"; options[i++] = "" + getNumCycles();
}
if (getOrderedFlag()) {
options[i++] = "-O"; options[i++] = "";
}
options[i++] = "-P"; options[i++] = "" + getNoiseRate();
while (i < options.length) {
options[i++] = "";
}
return options;
}
/**
* Initializes the format for the dataset produced.
*
* @return the output data format
* @exception Exception data format could not be defined
*/
public Instances defineDataFormat() throws Exception {
Random random = new Random (getSeed());
setRandom(random);
Instances dataset;
FastVector attributes = new FastVector(3);
Attribute attribute;
boolean classFlag = getClassFlag();
FastVector classValues = null;
if (classFlag) classValues = new FastVector (m_NumClusters);
// define dataset
for (int i = 0; i < getNumAttributes(); i++) {
attribute = new Attribute("X" + i);
attributes.addElement(attribute);
}
if (classFlag) {
for (int i = 0; i < m_NumClusters; i++) {
classValues.addElement("c" + i);
}
attribute = new Attribute ("class", classValues);
attributes.addElement(attribute);
}
dataset = new Instances(getRelationName(), attributes, 0);
if (classFlag)
dataset.setClassIndex(m_NumAttributes);
// set dataset format of this class
Instances format = new Instances(dataset, 0);
setDatasetFormat(format);
m_ClusterList = defineClusters(random);
System.out.println("dataset" + dataset.numAttributes());
return dataset;
}
/**
* Generate an example of the dataset.
* @return the instance generated
* @exception Exception if format not defined or generating <br>
* examples one by one is not possible, because voting is chosen
*/
public Instance generateExample() throws Exception {
throw new Exception("Examples cannot be generated" +
" one by one.");
}
/**
* Generate all examples of the dataset.
* @return the instance generated
* @exception Exception if format not defined
*/
public Instances generateExamples() throws Exception {
Random random = getRandom();
Instances data = getDatasetFormat();
if (data == null) throw new Exception("Dataset format not defined.");
// generate examples
if (getOrderedFlag())
data = generateExamples(random, data);
else
throw new Exception("RANDOMIZED is not yet implemented.");
return (data);
}
/**
* Generate all examples of the dataset.
* @return the instance generated
* @exception Exception if format not defined
*/
public Instances generateExamples(Random random,
Instances format) throws Exception {
Instance example = null;
if (format == null) throw new Exception("Dataset format not defined.");
// generate examples for one cluster after another
int cNum = 0;
for (Enumeration em = m_ClusterList.elements();
em.hasMoreElements(); cNum++) {
Cluster cl = (Cluster) em.nextElement();
double stdDev = cl.getStdDev();
int instNum = cl.getInstNum();
double [] center = cl.getCenter();
String cName = "c" + cNum;
for (int i = 0; i < instNum; i++) {
// generate example
example = generateInstance (format,
random,
stdDev,
center,
cName);
if (example != null)
example.setDataset(format);
format.add(example);
}
}
return (format);
}
/**
* Generate an example of the dataset.
* @return the instance generated
* @exception Exception if format not defined or generating <br>
* examples one by one is not possible, because voting is chosen
*/
private Instance generateInstance (Instances format,
Random randomG,
double stdDev,
double [] center,
String cName
) {
Instance example;
int numAtts = m_NumAttributes;
if (getClassFlag()) numAtts++;
example = new Instance(numAtts);
example.setDataset(format);
for (int i = 0; i < m_NumAttributes; i++) {
example.setValue(i, randomG.nextGaussian() * stdDev + center[i]);
}
if (getClassFlag()) {
example.setClassValue(cName);
}
return example;
}
/**
* Defines the clusters
*
* @param random random number generator
*/
private FastVector defineClusters(Random random)
throws Exception {
if (m_Pattern == GRID)
return defineClustersGRID(random);
else
return defineClustersRANDOM(random);
}
/**
* Defines the clusters if pattern is GRID
*
* @param random random number generator
*/
private FastVector defineClustersGRID(Random random)
throws Exception {
FastVector clusters = new FastVector(m_NumClusters);
double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum);
double minInstNum = (double) m_MinInstNum;
double diffRadius = m_MaxRadius - m_MinRadius;
Cluster cluster;
// compute gridsize
double gs = Math.pow(m_NumClusters, 1.0 / m_NumAttributes);
if (gs - ((double) ((int) gs)) > 0.0) {
m_GridSize = (int) (gs + 1.0);
} else { m_GridSize = (int) gs; }
// compute gridwidth
m_GridWidth = ((m_MaxRadius + m_MinRadius) / 2) * m_DistMult;
System.out.println("GridSize= " + m_GridSize);
System.out.println("GridWidth= " + m_GridWidth);
// initialize gridvector with zeros
GridVector gv = new GridVector(m_NumAttributes, m_GridSize);
for (int i = 0; i < m_NumClusters; i++) {
int instNum = (int) (random.nextDouble() * diffInstNum
+ minInstNum);
double radius = (random.nextDouble() * diffRadius) + m_MinRadius;
// center is defined in the constructor of cluster
cluster = new Cluster(instNum, radius,
gv.getGridVector(), m_GridWidth);
clusters.addElement((Object) cluster);
gv.addOne();
}
return clusters;
}
/**
* Defines the clusters if pattern is RANDOM
*
* @param random random number generator
*/
private FastVector defineClustersRANDOM(Random random)
throws Exception {
FastVector clusters = new FastVector(m_NumClusters);
double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum);
double minInstNum = (double) m_MinInstNum;
double diffRadius = m_MaxRadius - m_MinRadius;
Cluster cluster;
for (int i = 0; i < m_NumClusters; i++) {
int instNum = (int) (random.nextDouble() * diffInstNum
+ minInstNum);
double radius = (random.nextDouble() * diffRadius) + m_MinRadius;
// center is defined in the constructor of cluster
cluster = new Cluster(instNum, radius, random);
clusters.addElement((Object) cluster);
}
return clusters;
}
/**
* Compiles documentation about the data generation after
* the generation process
*
* @return string with additional information about generated dataset
* @exception Exception no input structure has been defined
*/
public String generateFinished() throws Exception {
StringBuffer docu = new StringBuffer();
Instances format = getDatasetFormat();//just for exception
// string is empty
docu.append("\n%\n%\n");
return docu.toString();
}
/**
* Compiles documentation about the data generation before
* the generation process
*
* @return string with additional information
*/
public String generateStart() {
StringBuffer docu = new StringBuffer();
// string is empty
docu.append("\n%\n%\n");
int sumInst = 0;
int cNum = 0;
for (Enumeration em = m_ClusterList.elements();
em.hasMoreElements(); cNum++) {
Cluster cl = (Cluster) em.nextElement();
docu.append("%\n");
docu.append("% Cluster: c"+ cNum + "\n");
docu.append("% ----------------------------------------------\n");
docu.append("% StandardDeviation: "
+ Utils.doubleToString(cl.getStdDev(), 2) + "\n");
docu.append("% Number of instances: "
+ cl.getInstNum() + "\n");
sumInst += cl.getInstNum();
double [] center = cl.getCenter();
docu.append("% ");
for (int i = 0; i < center.length - 1; i++) {
docu.append(Utils.doubleToString(center[i], 2) + ", ");
}
docu.append(Utils.doubleToString(center[center.length - 1], 2) + "\n");
}
docu.append("\n% ----------------------------------------------\n");
docu.append("% Total number of instances: " + sumInst + "\n");
docu.append("% in " + cNum + " clusters\n");
docu.append("% Pattern chosen : ");
if (getGridFlag()) docu.append("GRID, "
+ "distance multiplier = " +
Utils.doubleToString(m_DistMult, 2) + "\n");
else
if (getSineFlag()) docu.append("SINE\n");
else
docu.append("RANDOM\n");
return docu.toString();
}
/**
* Main method for testing this class.
*
* @param argv should contain arguments for the data producer:
*/
public static void main(String [] argv) {
try {
ClusterGenerator.makeData(new BIRCHCluster(), argv);
} catch (Exception ex) {
System.out.println(ex.getMessage());
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -