⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tertius.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Tertius.java
 *    Copyright (C) 2003 Peter A. Flach, Nicolas Lachiche
 *
 *    Thanks to Amelie Deltour for porting the original C code to Java
 *    and integrating it into Weka.
 */

package weka.associations;

import java.awt.BorderLayout;
import java.awt.Button;
import java.awt.Font;
import java.awt.Frame;
import java.awt.Label;
import java.awt.TextField;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Enumeration;
import java.util.Vector;

import weka.associations.tertius.AttributeValueLiteral;
import weka.associations.tertius.IndividualInstances;
import weka.associations.tertius.IndividualLiteral;
import weka.associations.tertius.Literal;
import weka.associations.tertius.Predicate;
import weka.associations.tertius.Rule;
import weka.associations.tertius.SimpleLinkedList;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.Utils;

/**
 * Class implementing a Tertius-type algorithm. <p>
 * 
 * References: P. A. Flach, N. Lachiche (1999). <i>Confirmation-Guided 
 * Discovery of first-order rules with Tertius</i>. 
 * Machine Learning, 42, 61-95. <p>
 * 
 * Valid options are:<p>
 *
 * -K number of values in result <br>
 * Set maximum number of confirmation  values in the result. (default: 10) <p>
 *
 * -F frequency threshold <br>
 * Set frequency threshold for pruning. (default: 0) <p>
 *
 * -C confirmation threshold <br>
 * Set confirmation threshold. (default: 0) <p>
 *
 * -N noise threshold <br>
 * Set noise threshold : maximum frequency of counter-examples.
 * 0 gives only satisfied rules. (default: 1) <p>
 *
 * -R <br>
 * Allow attributes to be repeated in a same rule. <p>
 *
 * -L number of literals <br>
 * Set maximum number of literals in a rule. (default: 4) <p>
 *
 * -G 0=no negation | 1=body | 2=head | 3=body and head <br>
 * Set the negations in the rule. (default: 0) <p>
 *
 * -S <br>
 * Consider only classification rules. <p>
 *
 * -c class index <br>
 * Set index of class attribute. (default: last). <p>
 *
 * -H <br>
 * Consider only horn clauses. <p>
 *
 * -E <br>
 * Keep equivalent rules. <p>
 *
 * -M <br>
 * Keep same clauses. <p>
 *
 * -T <br>
 * Keep subsumed rules. <p>
 *
 * -I 0=always match | 1=never match | 2=significant <br>
 * Set the way to handle missing values. (default: 0) <p>
 *
 * -O <br>
 * Use ROC analysis. <p>
 *
 * -p name of file <br>
 * Set the file containing the parts of the individual for individual-based 
 * learning. <p>
 *
 * -P 0=no output | 1=on stdout | 2=in separate window <br>
 * Set output of current values. (default: 0) <p>
 *
 * @author <a href="mailto:adeltour@netcourrier.com">Amelie Deltour</a>
 * @version $Revision$
 */

public class Tertius extends Associator implements OptionHandler, Runnable {

  /** The results. */
  private SimpleLinkedList m_results;

  /** Number of hypotheses considered. */
  private int m_hypotheses;

  /** Number of hypotheses explored. */
  private int m_explored;

  /** Time needed for the search. */
  private Date m_time;

  /** Field to output the current values. */ 
  private TextField m_valuesText;

  /** Instances used for the search. */
  private Instances m_instances;

  /** Predicates used in the rules. */
  private ArrayList m_predicates;

  /** Status of the search. */
  private int m_status;
  private static final int NORMAL = 0;
  private static final int MEMORY = 1; // memory problem
  private static final int STOP = 2; // user interruption
  
  /* Pruning options. */

  /** Number of best confirmation values to search. */
  private int m_best;

  /** Frequency threshold for the body and the negation of the head. */
  private double m_frequencyThreshold;

  /** Confirmation threshold for the rules. */
  private double m_confirmationThreshold;

  /** Maximal number of counter-instances. */
  private double m_noiseThreshold;

  /* Search space & language bias options. */

  /** Repeat attributes ? */
  private boolean m_repeat;

  /** Number of literals in a rule. */
  private int m_numLiterals;

  /** Types of negation. */
  private static final int NONE = 0;
  private static final int BODY = 1;
  private static final int HEAD = 2;
  private static final int ALL = 3;
  private static final Tag [] TAGS_NEGATION = {
    new Tag(NONE, "None"),
    new Tag(BODY, "Body"),
    new Tag(HEAD, "Head"),
    new Tag(ALL, "Both")
      };

  /** Type of negation used in the rules. */
  private int m_negation;

  /** Classification bias. */
  private boolean m_classification;

  /** Index of class attribute. */
  private int m_classIndex;

  /** Horn clauses bias. */
  private boolean m_horn;

  /* Subsumption tests options. */

  /** Perform test on equivalent rules ? */
  private boolean m_equivalent;

  /** Perform test on same clauses ? */
  private boolean m_sameClause;
  
  /** Perform subsumption test ? */
  private boolean m_subsumption;

  /** Ways of handling missing values.  */
  public static final int EXPLICIT = 0; // min counterinstances
  public static final int IMPLICIT = 1; // max counterinstances
  public static final int SIGNIFICANT = 2; // missing as a particular value
  private static final Tag [] TAGS_MISSING = {
    new Tag(EXPLICIT, "Matches all"),
    new Tag(IMPLICIT, "Matches none"),
    new Tag(SIGNIFICANT, "Significant")
      };

  /** Way of handling missing values in the search. */
  private int m_missing;

  /** Perform ROC analysis ? */
  private boolean m_roc;

  /** Name of the file containing the parts for individual-based learning. */
  private String m_partsString;
  
  /** Part instances for individual-based learning. */
  private Instances m_parts;

  /* Types of values output. */ 
  private static final int NO = 0;
  private static final int OUT = 1;
  private static final int WINDOW = 2;
  private static final Tag [] TAGS_VALUES = {
    new Tag(NO, "No"),
    new Tag(OUT, "stdout"),
    new Tag(WINDOW, "Window")
      };

  /** Type of values output. */
  private int m_printValues;

  /**
   * Constructor that sets the options to the default values.
   */
  public Tertius() {

    resetOptions();
  }

  /**
   * Returns a string describing this associator.
   *
   * @return A description of the evaluator suitable for
   * displaying in the explorer/experimenter gui.
   */
  public String globalInfo() {

    return "Finds rules according to confirmation measure.";
  }


  /**
   * Resets the options to the default values.
   */
  public void resetOptions() {

    /* Pruning options. */
    m_best = 10;
    m_frequencyThreshold = 0;
    m_confirmationThreshold = 0;
    m_noiseThreshold = 1;

    /* Search space & language bias options. */
    m_repeat = false;
    m_numLiterals = 4;
    m_negation = NONE;
    m_classification = false;
    m_classIndex = 0;
    m_horn = false;

    /* Subsumption tests options. */
    m_equivalent = true;
    m_sameClause = true;
    m_subsumption = true;

    /* Missing values. */
    m_missing = EXPLICIT;

    /* ROC analysis. */
    m_roc = false;

    /* Individual-based learning. */
    m_partsString = "";
    m_parts = null;

    /* Values output. */
    m_printValues = NO;
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return An enumeration of all the available options.
   */
  public Enumeration listOptions() {
    
    Vector newVector = new Vector(17);

    /* Pruning options. */
    newVector.addElement(new Option("\tSet maximum number of confirmation  "
				    + "values in the result. (default: 10)",
				    "K", 1, "-K <number of values in result>"));
    newVector.addElement(new Option("\tSet frequency threshold for pruning. "
				    + "(default: 0)",
				    "F", 1, "-F <frequency threshold>"));
    newVector.addElement(new Option("\tSet confirmation threshold. "
				    + "(default: 0)",
				    "C", 1, "-C <confirmation threshold>"));
    newVector.addElement(new Option("\tSet noise threshold : maximum frequency "
				    + "of counter-examples.\n\t0 gives only "
				    + "satisfied rules. (default: 1)",
				    "N", 1, "-N <noise threshold>"));

    /* Search space & language bias options. */
    newVector.addElement(new Option("\tAllow attributes to be repeated in a "
				    + "same rule.",
				    "R", 0, "-R"));
    newVector.addElement(new Option("\tSet maximum number of literals in a "
				    + "rule. (default: 4)",
				    "L", 1, "-L <number of literals>"));
    newVector.addElement(new Option("\tSet the negations in the rule. "
				    + "(default: 0)",
				    "G", 1, "-G <0=no negation | "
				    + "1=body | "
				    + "2=head | "
				    + "3=body and head>"));
    newVector.addElement(new Option("\tConsider only classification rules.",
				    "S", 0, "-S"));
    newVector.addElement(new Option("\tSet index of class attribute. "
				    + "(default: last).",
				    "c", 1, "-c <class index>"));
    newVector.addElement(new Option("\tConsider only horn clauses.",
				    "H", 0, "-H"));

    /* Subsumption tests options. */
    newVector.addElement(new Option("\tKeep equivalent rules.",
				    "E", 0, "-E"));
    newVector.addElement(new Option("\tKeep same clauses.",
				    "M", 0, "-M"));
    newVector.addElement(new Option("\tKeep subsumed rules.",
				    "T", 0, "-T"));

    /* Missing values options. */
    newVector.addElement(new Option("\tSet the way to handle missing values. " 
				    + "(default: 0)",
				    "I", 1, "-I <0=always match | "
				    + "1=never match | "
				    + "2=significant>"));

    /* ROC analysis. */
    newVector.addElement(new Option("\tUse ROC analysis. ",
				    "O", 0, "-O"));

    /* Individual-based learning. */
    newVector.addElement(new Option("\tSet the file containing the parts of "
				    + "the individual for individual-based "
				    + "learning.",
				    "p", 1, "-p <name of file>"));

    /* Values output. */
    newVector.addElement(new Option("\tSet output of current values. "
				    + "(default: 0)",
				    "P", 1, "-P <0=no output | "
				    + "1=on stdout | "
				    + "2=in separate window>"));
    
    return newVector.elements();
  }
  
  /**
   * Parses a given list of options.
   *
   * @param options The list of options as an array of strings.
   * @exception Exception if an option is not supported.
   */
  public void setOptions(String [] options) throws Exception {
    
    resetOptions();
    
    /* Pruning options. */
    String bestString = Utils.getOption('K', options);
    if (bestString.length() != 0) {
      try {
	m_best = Integer.parseInt(bestString);
      } catch (Exception e) {
	throw new Exception("Invalid value for -K option: "
			    + e.getMessage() + ".");
      }
      if (m_best < 1) {
	throw new Exception("Number of confirmation values has to be "
			    + "greater than one!");
      }
    }
    String frequencyThresholdString = Utils.getOption('F', options);
    if (frequencyThresholdString.length() != 0) {
      try {	
	m_frequencyThreshold 
	  = (new Double(frequencyThresholdString)).doubleValue();
      } catch (Exception e) {
	throw new Exception("Invalid value for -F option: "
			    + e.getMessage() + ".");
      }
      if (m_frequencyThreshold < 0 || m_frequencyThreshold > 1) {
	throw new Exception("Frequency threshold has to be between "
			    + "zero and one!");
      }
    }
    String confirmationThresholdString = Utils.getOption('C', options);
    if (confirmationThresholdString.length() != 0) {
      try {
	m_confirmationThreshold 
	  = (new Double(confirmationThresholdString)).doubleValue();
      } catch (Exception e) {
	throw new Exception("Invalid value for -C option: "
			    + e.getMessage() + ".");
      }
      if (m_confirmationThreshold < 0 || m_confirmationThreshold > 1) {
	throw new Exception("Confirmation threshold has to be between "
			    + "zero and one!");
      }
      if (bestString.length() != 0) {
	throw new Exception("Specifying both a number of confirmation "
			    + "values and a confirmation threshold "
			    + "doesn't make sense!");
      }
      if (m_confirmationThreshold != 0) {
	m_best = 0;
      }
    }
    String noiseThresholdString = Utils.getOption('N', options);
    if (noiseThresholdString.length() != 0) {
      try {
	m_noiseThreshold = (new Double(noiseThresholdString)).doubleValue();
      } catch (Exception e) {
	throw new Exception("Invalid value for -N option: "
			    + e.getMessage() + ".");
      }
      if (m_noiseThreshold < 0 || m_noiseThreshold > 1) {
	throw new Exception("Noise threshold has to be between "
			    + "zero and one!");
      }
    }

    /* Search space and language bias options. */
    m_repeat = Utils.getFlag('R', options);
    String numLiteralsString = Utils.getOption('L', options);
    if (numLiteralsString.length() != 0) {
      try {
	m_numLiterals = Integer.parseInt(numLiteralsString);
      } catch (Exception e) {
	throw new Exception("Invalid value for -L option: "
			    + e.getMessage() + ".");
      }
      if (m_numLiterals < 1) {
	throw new Exception("Number of literals has to be "
			    + "greater than one!");
      }
    }
    String negationString = Utils.getOption('G', options);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -