📄 agrawal.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Agrawal.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.datagenerators.classifiers.classification;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.datagenerators.ClassificationGenerator;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Generates a people database and is based on the paper by Agrawal et al.:<br/> * R. Agrawal, T. Imielinski, A. Swami (1993). Database Mining: A Performance Perspective. IEEE Transactions on Knowledge and Data Engineering. 5(6):914-925. URL http://www.almaden.ibm.com/software/quest/Publications/ByDate.html. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @article{Agrawal1993, * author = {R. Agrawal and T. Imielinski and A. Swami}, * journal = {IEEE Transactions on Knowledge and Data Engineering}, * note = {Special issue on Learning and Discovery in Knowledge-Based Databases}, * number = {6}, * pages = {914-925}, * title = {Database Mining: A Performance Perspective}, * volume = {5}, * year = {1993}, * URL = {http://www.almaden.ibm.com/software/quest/Publications/ByDate.html}, * PDF = {http://www.almaden.ibm.com/software/quest/Publications/papers/tkde93.pdf} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -h * Prints this help.</pre> * * <pre> -o <file> * The name of the output file, otherwise the generated data is * printed to stdout.</pre> * * <pre> -r <name> * The name of the relation.</pre> * * <pre> -d * Whether to print debug informations.</pre> * * <pre> -S * The seed for random function (default 1)</pre> * * <pre> -n <num> * The number of examples to generate (default 100)</pre> * * <pre> -F <num> * The function to use for generating the data. (default 1)</pre> * * <pre> -B * Whether to balance the class.</pre> * * <pre> -P <num> * The perturbation factor. (default 0.05)</pre> * <!-- options-end --> * * @author Richard Kirkby (rkirkby at cs dot waikato dot ac dot nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.5 $ */public class Agrawal extends ClassificationGenerator implements TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 2254651939636143025L; /** * the interface for the class functions */ protected interface ClassFunction { /** * returns a class value based on the given inputs * @param salary the salary * @param commission the commission * @param age the age * @param elevel the education level * @param car * @param zipcode the zip code * @param hvalue * @param hyears * @param loan */ public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan); } /** * built in functions are based on the paper (page 924), * which turn out to be functions pred20 thru pred29 in the public c code */ protected static ClassFunction[] builtInFunctions = { // function 1 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40 || 60 <= age) return 0; else return 1; } }, // function 2 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) if (50000 <= salary && salary <= 100000) return 0; else return 1; else if (age < 60) // && age >= 40 if (75000 <= salary && salary <= 125000) return 0; else return 1; else // age >= 60 if (25000 <= salary && salary <= 75000) return 0; else return 1; } }, // function 3 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) if (elevel == 0 || elevel == 1) return 0; else return 1; else if (age < 60) // && age >= 40 if (elevel == 1 || elevel == 2 || elevel == 3) return 0; else return 1; else // age >= 60 if (elevel == 2 || elevel == 3 || elevel == 4) return 0; else return 1; } }, // function 4 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) if (elevel == 0 || elevel == 1) if (25000 <= salary && salary <= 75000) return 0; else return 1; else if (50000 <= salary && salary <= 100000) return 0; else return 1; else if (age < 60) // && age >= 40 if (elevel == 1 || elevel == 2 || elevel == 3) if (50000 <= salary && salary <= 100000) return 0; else return 1; else if (75000 <= salary && salary <= 125000) return 0; else return 1; else // age >= 60 if (elevel == 2 || elevel == 3 || elevel == 4) if (50000 <= salary && salary <= 100000) return 0; else return 1; else if (25000 <= salary && salary <= 75000) return 0; else return 1; } }, // function 5 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { if (age < 40) if (50000 <= salary && salary <= 100000) if (100000 <= loan && loan <= 300000) return 0; else return 1; else if (200000 <= loan && loan <= 400000) return 0; else return 1; else if (age < 60) // && age >= 40 if (75000 <= salary && salary <= 125000) if (200000 <= loan && loan <= 400000) return 0; else return 1; else if (300000 <= loan && loan <= 500000) return 0; else return 1; else // age >= 60 if (25000 <= salary && salary <= 75000) if (300000 <= loan && loan <= 500000) return 0; else return 1; else if (100000 <= loan && loan <= 300000) return 0; else return 1; } }, // function 6 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double totalSalary = salary + commission; if (age < 40) if (50000 <= totalSalary && totalSalary <= 100000) return 0; else return 1; else if (age < 60) // && age >= 40 if (75000 <= totalSalary && totalSalary <= 125000) return 0; else return 1; else // age >= 60 if (25000 <= totalSalary && totalSalary <= 75000) return 0; else return 1; } }, // function 7 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double disposable = (2.0 * (salary + commission) / 3.0 - loan / 5.0 - 20000.0); return disposable > 0 ? 0 : 1; } }, // function 8 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * (double) elevel - 20000.0); return disposable > 0 ? 0 : 1; } }, // function 9 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * (double) elevel - loan / 5.0 - 10000.0); return disposable > 0 ? 0 : 1; } }, // function 10 new ClassFunction() { public long determineClass(double salary, double commission, int age, int elevel, int car, int zipcode, double hvalue, int hyears, double loan) { double equity = 0.0; if (hyears >= 20) equity = hvalue * ((double) hyears - 20.0) / 10.0; double disposable = (2.0 * (salary + commission) / 3.0 - 5000.0 * (double) elevel + equity / 5.0 - 10000.0); return disposable > 0 ? 0 : 1; } } }; /** function 1 */ public final static int FUNCTION_1 = 1; /** function 2 */ public final static int FUNCTION_2 = 2; /** function 3 */ public final static int FUNCTION_3 = 3; /** function 4 */ public final static int FUNCTION_4 = 4; /** function 5 */ public final static int FUNCTION_5 = 5; /** function 6 */ public final static int FUNCTION_6 = 6; /** function 7 */ public final static int FUNCTION_7 = 7; /** function 8 */ public final static int FUNCTION_8 = 8; /** function 9 */ public final static int FUNCTION_9 = 9; /** function 10 */ public final static int FUNCTION_10 = 10; /** the funtion tags */ public static final Tag[] FUNCTION_TAGS = { new Tag(FUNCTION_1, "Function 1"), new Tag(FUNCTION_2, "Function 2"), new Tag(FUNCTION_3, "Function 3"), new Tag(FUNCTION_4, "Function 4"), new Tag(FUNCTION_5, "Function 5"), new Tag(FUNCTION_6, "Function 6"), new Tag(FUNCTION_7, "Function 7"), new Tag(FUNCTION_8, "Function 8"), new Tag(FUNCTION_9, "Function 9"), new Tag(FUNCTION_10, "Function 10"), }; /** the function to use for generating the data */ protected int m_Function; /** whether to balance the class */ protected boolean m_BalanceClass; /** the perturabation fraction */ protected double m_PerturbationFraction; /** used for balancing the class */ protected boolean m_nextClassShouldBeZero; /** the last class label that was generated */ protected double m_lastLabel; /** * initializes the generator with default values */ public Agrawal() { super(); setFunction(defaultFunction()); setBalanceClass(defaultBalanceClass()); setPerturbationFraction(defaultPerturbationFraction()); } /** * Returns a string describing this data generator. * * @return a description of the data generator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Generates a people database and is based on the paper by Agrawal " + "et al.:\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.ARTICLE); result.setValue(Field.AUTHOR, "R. Agrawal and T. Imielinski and A. Swami"); result.setValue(Field.YEAR, "1993"); result.setValue(Field.TITLE, "Database Mining: A Performance Perspective"); result.setValue(Field.JOURNAL, "IEEE Transactions on Knowledge and Data Engineering"); result.setValue(Field.VOLUME, "5"); result.setValue(Field.NUMBER, "6"); result.setValue(Field.PAGES, "914-925"); result.setValue(Field.NOTE, "Special issue on Learning and Discovery in Knowledge-Based Databases"); result.setValue(Field.URL, "http://www.almaden.ibm.com/software/quest/Publications/ByDate.html"); result.setValue(Field.PDF, "http://www.almaden.ibm.com/software/quest/Publications/papers/tkde93.pdf"); return result; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ public Enumeration listOptions() { Vector result = enumToVector(super.listOptions()); result.add(new Option( "\tThe function to use for generating the data. (default " + defaultFunction().getSelectedTag().getID() + ")", "F", 1, "-F <num>")); result.add(new Option( "\tWhether to balance the class.", "B", 0, "-B")); result.add(new Option( "\tThe perturbation factor. (default " + defaultPerturbationFraction() + ")", "P", 1, "-P <num>")); return result.elements(); } /** * Parses a list of options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -