📄 agrawal.java
字号:
* <pre> -h * Prints this help.</pre> * * <pre> -o <file> * The name of the output file, otherwise the generated data is * printed to stdout.</pre> * * <pre> -r <name> * The name of the relation.</pre> * * <pre> -d * Whether to print debug informations.</pre> * * <pre> -S * The seed for random function (default 1)</pre> * * <pre> -n <num> * The number of examples to generate (default 100)</pre> * * <pre> -F <num> * The function to use for generating the data. (default 1)</pre> * * <pre> -B * Whether to balance the class.</pre> * * <pre> -P <num> * The perturbation factor. (default 0.05)</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; super.setOptions(options); tmpStr = Utils.getOption('F', options); if (tmpStr.length() != 0) setFunction(new SelectedTag(Integer.parseInt(tmpStr), FUNCTION_TAGS)); else setFunction(defaultFunction()); setBalanceClass(Utils.getFlag('B', options)); tmpStr = Utils.getOption('P', options); if (tmpStr.length() != 0) setPerturbationFraction(Double.parseDouble(tmpStr)); else setPerturbationFraction(defaultPerturbationFraction()); } /** * Gets the current settings of the datagenerator. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); result.add("-F"); result.add("" + m_Function); if (getBalanceClass()) result.add("-B"); result.add("-P"); result.add("" + getPerturbationFraction()); return (String[]) result.toArray(new String[result.size()]); } /** * returns the default function * * @return the default function */ protected SelectedTag defaultFunction() { return new SelectedTag(FUNCTION_1, FUNCTION_TAGS); } /** * Gets the function for generating the data. * * @return the function. * @see #FUNCTION_TAGS */ public SelectedTag getFunction() { return new SelectedTag(m_Function, FUNCTION_TAGS); } /** * Sets the function for generating the data. * * @param value the function. * @see #FUNCTION_TAGS */ public void setFunction(SelectedTag value) { if (value.getTags() == FUNCTION_TAGS) m_Function = value.getSelectedTag().getID(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String functionTipText() { return "The function to use for generating the data."; } /** * returns the default for balancing the class * * @return the default for balancing the class */ protected boolean defaultBalanceClass() { return false; } /** * Gets whether the class is balanced. * * @return whether the class is balanced. */ public boolean getBalanceClass() { return m_BalanceClass; } /** * Sets whether the class is balanced. * * @param value whether to balance the class. */ public void setBalanceClass(boolean value) { m_BalanceClass = value; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String balanceClassTipText() { return "Whether to balance the class."; } /** * returns the default perturbation fraction * * @return the default perturbation fraction */ protected double defaultPerturbationFraction() { return 0.05; } /** * Gets the perturbation fraction. * * @return the perturbation fraction. */ public double getPerturbationFraction() { return m_PerturbationFraction; } /** * Sets the perturbation fraction. * * @param value the perturbation fraction. */ public void setPerturbationFraction(double value) { if ( (value >= 0.0) && (value <= 1.0) ) m_PerturbationFraction = value; else throw new IllegalArgumentException( "Perturbation fraction must be in [0,1] (provided: " + value + ")!"); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String perturbationFractionTipText() { return "The perturbation fraction: 0 <= fraction <= 1."; } /** * Return if single mode is set for the given data generator * mode depends on option setting and or generator type. * * @return single mode flag * @throws Exception if mode is not set yet */ public boolean getSingleModeFlag() throws Exception { return true; } /** * Initializes the format for the dataset produced. * Must be called before the generateExample or generateExamples * methods are used. * Re-initializes the random number generator with the given seed. * * @return the format for the dataset * @throws Exception if the generating of the format failed * @see #getSeed() */ public Instances defineDataFormat() throws Exception { FastVector atts; FastVector attValues; int i; m_Random = new Random(getSeed()); m_nextClassShouldBeZero = true; m_lastLabel = Double.NaN; // number of examples is the same as given per option setNumExamplesAct(getNumExamples()); // set up attributes atts = new FastVector(); atts.addElement(new Attribute("salary")); atts.addElement(new Attribute("commission")); attValues = new FastVector(); atts.addElement(new Attribute("age")); attValues = new FastVector(); for (i = 0; i < 5; i++) attValues.addElement("" + i); atts.addElement(new Attribute("elevel", attValues)); attValues = new FastVector(); for (i = 1; i <= 20; i++) attValues.addElement("" + i); atts.addElement(new Attribute("car", attValues)); attValues = new FastVector(); for (i = 0; i < 9; i++) attValues.addElement("" + i); atts.addElement(new Attribute("zipcode", attValues)); atts.addElement(new Attribute("hvalue")); atts.addElement(new Attribute("hyears")); atts.addElement(new Attribute("loan")); attValues = new FastVector(); for (i = 0; i < 2; i++) attValues.addElement("" + i); atts.addElement(new Attribute("group", attValues)); // dataset m_DatasetFormat = new Instances(getRelationNameToUse(), atts, 0); return m_DatasetFormat; } /** * perturbs the given value * * @param val the value to perturb * @param min the minimum * @param max the maximum * @return the perturbed value */ protected double perturbValue(double val, double min, double max) { return perturbValue(val, max - min, min, max); } /** * perturbs the given value * * @param val the value to perturb * @param range the range for the perturbation * @param min the minimum * @param max the maximum * @return the perturbed value */ protected double perturbValue(double val, double range, double min, double max) { val += range * (2.0 * (getRandom().nextDouble() - 0.5)) * getPerturbationFraction(); if (val < min) val = min; else if (val > max) val = max; return val; } /** * Generates one example of the dataset. * * @return the generated example * @throws Exception if the format of the dataset is not yet defined * @throws Exception if the generator only works with generateExamples * which means in non single mode */ public Instance generateExample() throws Exception { Instance result; double salary; double commission; double hvalue; double loan; int age; int elevel; int car; int zipcode; int hyears; boolean desiredClassFound; double[] atts; Random random; ClassFunction classFunction; result = null; random = getRandom(); if (m_DatasetFormat == null) throw new Exception("Dataset format not defined."); salary = 0; commission = 0; hvalue = 0; loan = 0; age = 0; elevel = 0; car = 0; zipcode = 0; hyears = 0; desiredClassFound = false; classFunction = builtInFunctions[m_Function - 1]; while (!desiredClassFound) { // generate attributes salary = 20000.0 + 130000.0 * random.nextDouble(); commission = (salary >= 75000.0) ? 0 : (10000.0 + 65000.0 * random.nextDouble()); age = 20 + random.nextInt(61); elevel = random.nextInt(5); car = 1 + random.nextInt(20); zipcode = random.nextInt(9); hvalue = (9.0 - (double) zipcode) * 100000.0 * (0.5 + random.nextDouble()); hyears = 1 + random.nextInt(30); loan = random.nextDouble() * 500000.0; // determine class m_lastLabel = classFunction.determineClass(salary, commission, age, elevel, car, zipcode, hvalue, hyears, loan); if (!getBalanceClass()) { desiredClassFound = true; } else { // balance the classes if ( ( m_nextClassShouldBeZero && (m_lastLabel == 0)) || (!m_nextClassShouldBeZero && (m_lastLabel == 1)) ) { desiredClassFound = true; m_nextClassShouldBeZero = !m_nextClassShouldBeZero; } // else keep searching } } // perturb values if (getPerturbationFraction() > 0.0) { salary = perturbValue(salary, 20000, 150000); if (commission > 0) commission = perturbValue(commission, 10000, 75000); age = (int) Math.round(perturbValue(age, 20, 80)); hvalue = perturbValue( hvalue, (9.0 - (double) zipcode) * 100000.0, 0, 135000); hyears = (int) Math.round(perturbValue(hyears, 1, 30)); loan = perturbValue(loan, 0, 500000); } // create instance atts = new double[m_DatasetFormat.numAttributes()]; atts[0] = salary; atts[1] = commission; atts[2] = age; atts[3] = elevel; atts[4] = car - 1; atts[5] = zipcode; atts[6] = hvalue; atts[7] = hyears; atts[8] = loan; atts[9] = m_lastLabel; result = new Instance(1.0, atts); result.setDataset(m_DatasetFormat); return result; } /** * Generates all examples of the dataset. Re-initializes the random number * generator with the given seed, before generating instances. * * @return the generated dataset * @throws Exception if the format of the dataset is not yet defined * @throws Exception if the generator only works with generateExample, * which means in single mode * @see #getSeed() */ public Instances generateExamples() throws Exception { Instances result; int i; result = new Instances(m_DatasetFormat, 0); m_Random = new Random(getSeed()); for (i = 0; i < getNumExamplesAct(); i++) result.add(generateExample()); return result; } /** * Generates a comment string that documentates the data generator. * By default this string is added at the beginning of the produced output * as ARFF file type, next after the options. * * @return string contains info about the generated rules */ public String generateStart () { return ""; } /** * Generates a comment string that documentats the data generator. * By default this string is added at the end of theproduces output * as ARFF file type. * * @return string contains info about the generated rules * @throws Exception if the generating of the documentaion fails */ public String generateFinished() throws Exception { return ""; } /** * Main method for executing this class. * * @param args should contain arguments for the data producer: */ public static void main(String[] args) { runDataGenerator(new Agrawal(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -