📄 tertius.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Tertius.java
* Copyright (C) 2003 Peter A. Flach, Nicolas Lachiche
*
* Thanks to Amelie Deltour for porting the original C code to Java
* and integrating it into Weka.
*/
package weka.associations;
import java.awt.BorderLayout;
import java.awt.Button;
import java.awt.Font;
import java.awt.Frame;
import java.awt.Label;
import java.awt.TextField;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.Vector;
import weka.associations.tertius.AttributeValueLiteral;
import weka.associations.tertius.IndividualInstances;
import weka.associations.tertius.IndividualLiteral;
import weka.associations.tertius.Literal;
import weka.associations.tertius.Predicate;
import weka.associations.tertius.Rule;
import weka.associations.tertius.SimpleLinkedList;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.Utils;
/**
* Class implementing a Tertius-type algorithm. <p>
*
* References: P. A. Flach, N. Lachiche (1999). <i>Confirmation-Guided
* Discovery of first-order rules with Tertius</i>.
* Machine Learning, 42, 61-95. <p>
*
* Valid options are:<p>
*
* -K number of values in result <br>
* Set maximum number of confirmation values in the result. (default: 10) <p>
*
* -F frequency threshold <br>
* Set frequency threshold for pruning. (default: 0) <p>
*
* -C confirmation threshold <br>
* Set confirmation threshold. (default: 0) <p>
*
* -N noise threshold <br>
* Set noise threshold : maximum frequency of counter-examples.
* 0 gives only satisfied rules. (default: 1) <p>
*
* -R <br>
* Allow attributes to be repeated in a same rule. <p>
*
* -L number of literals <br>
* Set maximum number of literals in a rule. (default: 4) <p>
*
* -G 0=no negation | 1=body | 2=head | 3=body and head <br>
* Set the negations in the rule. (default: 0) <p>
*
* -S <br>
* Consider only classification rules. <p>
*
* -c class index <br>
* Set index of class attribute. (default: last). <p>
*
* -H <br>
* Consider only horn clauses. <p>
*
* -E <br>
* Keep equivalent rules. <p>
*
* -M <br>
* Keep same clauses. <p>
*
* -T <br>
* Keep subsumed rules. <p>
*
* -I 0=always match | 1=never match | 2=significant <br>
* Set the way to handle missing values. (default: 0) <p>
*
* -O <br>
* Use ROC analysis. <p>
*
* -p name of file <br>
* Set the file containing the parts of the individual for individual-based
* learning. <p>
*
* -P 0=no output | 1=on stdout | 2=in separate window <br>
* Set output of current values. (default: 0) <p>
*
* @author <a href="mailto:adeltour@netcourrier.com">Amelie Deltour</a>
* @version $Revision$
*/
public class Tertius extends Associator implements OptionHandler, Runnable {
/** The results. */
private SimpleLinkedList m_results;
/** Number of hypotheses considered. */
private int m_hypotheses;
/** Number of hypotheses explored. */
private int m_explored;
/** Time needed for the search. */
private Date m_time;
/** Field to output the current values. */
private TextField m_valuesText;
/** Instances used for the search. */
private Instances m_instances;
/** Predicates used in the rules. */
private ArrayList m_predicates;
/** Status of the search. */
private int m_status;
private static final int NORMAL = 0;
private static final int MEMORY = 1; // memory problem
private static final int STOP = 2; // user interruption
/* Pruning options. */
/** Number of best confirmation values to search. */
private int m_best;
/** Frequency threshold for the body and the negation of the head. */
private double m_frequencyThreshold;
/** Confirmation threshold for the rules. */
private double m_confirmationThreshold;
/** Maximal number of counter-instances. */
private double m_noiseThreshold;
/* Search space & language bias options. */
/** Repeat attributes ? */
private boolean m_repeat;
/** Number of literals in a rule. */
private int m_numLiterals;
/** Types of negation. */
private static final int NONE = 0;
private static final int BODY = 1;
private static final int HEAD = 2;
private static final int ALL = 3;
private static final Tag [] TAGS_NEGATION = {
new Tag(NONE, "None"),
new Tag(BODY, "Body"),
new Tag(HEAD, "Head"),
new Tag(ALL, "Both")
};
/** Type of negation used in the rules. */
private int m_negation;
/** Classification bias. */
private boolean m_classification;
/** Index of class attribute. */
private int m_classIndex;
/** Horn clauses bias. */
private boolean m_horn;
/* Subsumption tests options. */
/** Perform test on equivalent rules ? */
private boolean m_equivalent;
/** Perform test on same clauses ? */
private boolean m_sameClause;
/** Perform subsumption test ? */
private boolean m_subsumption;
/** Ways of handling missing values. */
public static final int EXPLICIT = 0; // min counterinstances
public static final int IMPLICIT = 1; // max counterinstances
public static final int SIGNIFICANT = 2; // missing as a particular value
private static final Tag [] TAGS_MISSING = {
new Tag(EXPLICIT, "Matches all"),
new Tag(IMPLICIT, "Matches none"),
new Tag(SIGNIFICANT, "Significant")
};
/** Way of handling missing values in the search. */
private int m_missing;
/** Perform ROC analysis ? */
private boolean m_roc;
/** Name of the file containing the parts for individual-based learning. */
private String m_partsString;
/** Part instances for individual-based learning. */
private Instances m_parts;
/* Types of values output. */
private static final int NO = 0;
private static final int OUT = 1;
private static final int WINDOW = 2;
private static final Tag [] TAGS_VALUES = {
new Tag(NO, "No"),
new Tag(OUT, "stdout"),
new Tag(WINDOW, "Window")
};
/** Type of values output. */
private int m_printValues;
/**
* Constructor that sets the options to the default values.
*/
public Tertius() {
resetOptions();
}
/**
* Returns a string describing this associator.
*
* @return A description of the evaluator suitable for
* displaying in the explorer/experimenter gui.
*/
public String globalInfo() {
return "Finds rules according to confirmation measure.";
}
/**
* Resets the options to the default values.
*/
public void resetOptions() {
/* Pruning options. */
m_best = 10;
m_frequencyThreshold = 0;
m_confirmationThreshold = 0;
m_noiseThreshold = 1;
/* Search space & language bias options. */
m_repeat = false;
m_numLiterals = 4;
m_negation = NONE;
m_classification = false;
m_classIndex = 0;
m_horn = false;
/* Subsumption tests options. */
m_equivalent = true;
m_sameClause = true;
m_subsumption = true;
/* Missing values. */
m_missing = EXPLICIT;
/* ROC analysis. */
m_roc = false;
/* Individual-based learning. */
m_partsString = "";
m_parts = null;
/* Values output. */
m_printValues = NO;
}
/**
* Returns an enumeration describing the available options.
*
* @return An enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(17);
/* Pruning options. */
newVector.addElement(new Option("\tSet maximum number of confirmation "
+ "values in the result. (default: 10)",
"K", 1, "-K <number of values in result>"));
newVector.addElement(new Option("\tSet frequency threshold for pruning. "
+ "(default: 0)",
"F", 1, "-F <frequency threshold>"));
newVector.addElement(new Option("\tSet confirmation threshold. "
+ "(default: 0)",
"C", 1, "-C <confirmation threshold>"));
newVector.addElement(new Option("\tSet noise threshold : maximum frequency "
+ "of counter-examples.\n\t0 gives only "
+ "satisfied rules. (default: 1)",
"N", 1, "-N <noise threshold>"));
/* Search space & language bias options. */
newVector.addElement(new Option("\tAllow attributes to be repeated in a "
+ "same rule.",
"R", 0, "-R"));
newVector.addElement(new Option("\tSet maximum number of literals in a "
+ "rule. (default: 4)",
"L", 1, "-L <number of literals>"));
newVector.addElement(new Option("\tSet the negations in the rule. "
+ "(default: 0)",
"G", 1, "-G <0=no negation | "
+ "1=body | "
+ "2=head | "
+ "3=body and head>"));
newVector.addElement(new Option("\tConsider only classification rules.",
"S", 0, "-S"));
newVector.addElement(new Option("\tSet index of class attribute. "
+ "(default: last).",
"c", 1, "-c <class index>"));
newVector.addElement(new Option("\tConsider only horn clauses.",
"H", 0, "-H"));
/* Subsumption tests options. */
newVector.addElement(new Option("\tKeep equivalent rules.",
"E", 0, "-E"));
newVector.addElement(new Option("\tKeep same clauses.",
"M", 0, "-M"));
newVector.addElement(new Option("\tKeep subsumed rules.",
"T", 0, "-T"));
/* Missing values options. */
newVector.addElement(new Option("\tSet the way to handle missing values. "
+ "(default: 0)",
"I", 1, "-I <0=always match | "
+ "1=never match | "
+ "2=significant>"));
/* ROC analysis. */
newVector.addElement(new Option("\tUse ROC analysis. ",
"O", 0, "-O"));
/* Individual-based learning. */
newVector.addElement(new Option("\tSet the file containing the parts of "
+ "the individual for individual-based "
+ "learning.",
"p", 1, "-p <name of file>"));
/* Values output. */
newVector.addElement(new Option("\tSet output of current values. "
+ "(default: 0)",
"P", 1, "-P <0=no output | "
+ "1=on stdout | "
+ "2=in separate window>"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* @param options The list of options as an array of strings.
* @exception Exception if an option is not supported.
*/
public void setOptions(String [] options) throws Exception {
resetOptions();
/* Pruning options. */
String bestString = Utils.getOption('K', options);
if (bestString.length() != 0) {
try {
m_best = Integer.parseInt(bestString);
} catch (Exception e) {
throw new Exception("Invalid value for -K option: "
+ e.getMessage() + ".");
}
if (m_best < 1) {
throw new Exception("Number of confirmation values has to be "
+ "greater than one!");
}
}
String frequencyThresholdString = Utils.getOption('F', options);
if (frequencyThresholdString.length() != 0) {
try {
m_frequencyThreshold
= (new Double(frequencyThresholdString)).doubleValue();
} catch (Exception e) {
throw new Exception("Invalid value for -F option: "
+ e.getMessage() + ".");
}
if (m_frequencyThreshold < 0 || m_frequencyThreshold > 1) {
throw new Exception("Frequency threshold has to be between "
+ "zero and one!");
}
}
String confirmationThresholdString = Utils.getOption('C', options);
if (confirmationThresholdString.length() != 0) {
try {
m_confirmationThreshold
= (new Double(confirmationThresholdString)).doubleValue();
} catch (Exception e) {
throw new Exception("Invalid value for -C option: "
+ e.getMessage() + ".");
}
if (m_confirmationThreshold < 0 || m_confirmationThreshold > 1) {
throw new Exception("Confirmation threshold has to be between "
+ "zero and one!");
}
if (bestString.length() != 0) {
throw new Exception("Specifying both a number of confirmation "
+ "values and a confirmation threshold "
+ "doesn't make sense!");
}
if (m_confirmationThreshold != 0) {
m_best = 0;
}
}
String noiseThresholdString = Utils.getOption('N', options);
if (noiseThresholdString.length() != 0) {
try {
m_noiseThreshold = (new Double(noiseThresholdString)).doubleValue();
} catch (Exception e) {
throw new Exception("Invalid value for -N option: "
+ e.getMessage() + ".");
}
if (m_noiseThreshold < 0 || m_noiseThreshold > 1) {
throw new Exception("Noise threshold has to be between "
+ "zero and one!");
}
}
/* Search space and language bias options. */
m_repeat = Utils.getFlag('R', options);
String numLiteralsString = Utils.getOption('L', options);
if (numLiteralsString.length() != 0) {
try {
m_numLiterals = Integer.parseInt(numLiteralsString);
} catch (Exception e) {
throw new Exception("Invalid value for -L option: "
+ e.getMessage() + ".");
}
if (m_numLiterals < 1) {
throw new Exception("Number of literals has to be "
+ "greater than one!");
}
}
String negationString = Utils.getOption('G', options);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -