⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assocrulemining.java

📁 Decision Tree Decision Tree Decision Tree
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
/* -------------------------------------------------------------------------- */
/*                                                                            */
/*                      ASSOCIATION RULE DATA MINING                          */
/*                                                                            */
/*                            Frans Coenen                                    */
/*                                                                            */
/*                        Wednesday 9 January 2003                            */
/*   (revised 21/1/2003, 14/2/2003, 2/5/2003, 2/7/2003, 3/2/2004, 8/5/2004,   */
/*     1/2/2005, 3/2/2005, 14/2/06, 14/3/06, 18/6/06, 1/7/2006, 11/10/2006,   */
/*                         27/10/2006, 14/11/2006)                            */
/*                                                                            */
/*                    Department of Computer Science                          */
/*                     The University of Liverpool                            */
/*                                                                            */
/* -------------------------------------------------------------------------- */

//package lucsKDD_ARM;

/* To compile: javaARMpackc.exe AssocRuleMining.java */

// Java packages
import java.io.*;
import java.util.*;

// Java GUI packages
import javax.swing.*;

/** Set of utillities to support various Association Rule Mining (ARM)
algorithms included in the LUCS-KDD suite of ARM programs.
@author Frans Coenen
@version 11 October 2006 */

public class AssocRuleMining extends JFrame {

    /* ------ FIELDS ------ */

    /** Constants. */
    protected int MAX_NUM_RULES = 10000;

    // Data structures
    /** The reference to start of the rule list. */
    protected RuleNode startRulelist = null;
    /** 2-D aray to hold input data from data file. Note that within the data
    array records are numbered from zero, thus record one has index 0 etc.
    <P>First index is row (record or TID) number starting from 0, and second
    is attribute (column) number starting from zero. */
    protected short[][] dataArray = null;
    /** 2-D array used to renumber columns for input data in terms of
    frequency of single attributes (reordering will enhance performance
    for some ARM algorithms). */
    protected int[][] conversionArray   = null;
    /** 1-D array used to reconvert input data column numbers to their
    original numbering where the input data has been ordered to enhance
    computational efficiency. */
    protected short[] reconversionArray = null;
    /** 1-D array to hold output schema. */
    private String[] outputSchema = null;

    // Constants

    /** Minimum support value */
    protected static final double MIN_SUPPORT = 0.0;
    /** Maximum support value */
    protected static final double MAX_SUPPORT = 100.0;
    /** Maximum confidence value */
    protected static final double MIN_CONFIDENCE = 0.0;
    /** Maximum confidence value */
    protected static final double MAX_CONFIDENCE = 100.0;

    // Command line arguments with default values and associated fields.

    /** Command line argument for data file name. */
    protected String  fileName = null;
    /** Command line argument for output file name. */
    protected String  outputFileName = null;
    /** Command line argument for file name for testset (used in classification
    where separate test and training set files may be used. */
    protected String testSetFileName = null;
    /** Command line argument for number of columns (attributes) in input
    data. */
    protected int     numCols    = 0;
    /** Command line argument for number of rows in input data. */
    protected int     numRows    = 0;
    /** Command line argument for % support (default = 20%). */
    protected double  support    = 20.0;
    
    // More fields
    /** Minimum support value in terms of number of rows. <P>Set when input
    data is read and the number of records is known, reset if input data is
    resized so that only N percent is used. */
    protected double  minSupport = 0;
    /** Command line argument for % confidence (default = 80%). */
    protected double  confidence = 80.0;
    /** The number of one itemsets (singletons). */
    protected int numOneItemSets = 0;
    /** The number of frequent sets identified during processing. */
    protected int numFrequentSets = 0;
    /** The number of rules (ARs, CARs or CRs) that have been generated. */
    protected int numRules = 0;
    /** Number of classes in input data set (input by the user). */
    protected int numClasses = 0;
    /** Number of rows in output schema. */
    private int numRowsInOutputSchema = 0;

    // Flags

    /** Error flag used when checking command line arguments (default =
    <TT>true</TT>). */
    protected boolean errorFlag  = true;
    /** Input format OK flag( default = <TT>true</TT>). */
    protected boolean inputFormatOkFlag = true;
    /** Flag to indicate whether system has data or not. */
    private boolean haveDataFlag = false;
    /** Flag to indicate whether input data has been sorted or not. */
    protected boolean isOrderedFlag = false;
    /** Flag to indicate whether input data has been sorted and pruned or
    not. */
    protected boolean isPrunedFlag = false;
    /** Flag to indicate whether output schema is available or not. */
    protected boolean hasOutputSchemaFlag = false;
    /** Support confidence framework flag. */
    protected boolean supConfFworkFlag = false;
    /** Support lift framework flag. */
    protected boolean supLiftFworkFlag = false;
    /** Output rule set to file flag */
    protected boolean outputRuleSetToFileFlag = false;

    // Other fields

    /** The input stream, instance of class <TT>BufferedReader</TT>. */
    protected BufferedReader fileInput;
    /** The output stream, instance of calss <TT>PrintWriter</TT> */
    private PrintWriter fileOutput;
    /** The file path */
    protected File filePath = null;
    
    /* ------ CONSTRUCTORS ------ */

    /** Constructor with command line arguments to be process.
    @param args the command line arguments (array of String instances).  */

    public AssocRuleMining(String[] args) {

	// Process command line arguments

	for(int index=0;index<args.length;index++) idArgument(args[index]);

	// If command line arguments read successfully (errorFlag set to
        // "true") check validity of arguments

	if (errorFlag) CheckInputArguments();
	else outputMenu();
        }

    /** One argument constructior with argument from existing instance of
    class AssocRuleMining.
    @param armInstance the given instance of the <TT>AssocRuleMining</TT>
    class. */

    public AssocRuleMining(AssocRuleMining armInstance) {
        outputSchema      = armInstance.outputSchema;
        }

    /** Default constructor used by: (1) BruteForce, (2) Total Support
    Tree, (3) ClassAppMapGUI and (4) AprioriTgui classes and others.   */

    public AssocRuleMining() {
        }

    /* ------ METHODS ------ */

    /* ---------------------------------------------------------------- */
    /*                                                                  */
    /*                        COMMAND LINE ARGUMENTS                    */
    /*                                                                  */
    /* ---------------------------------------------------------------- */

    /* IDENTIFY ARGUMENT */
    /** Identifies nature of individual command line agruments:
    -C = confidence, -F = file name, -S = support. */

    protected void idArgument(String argument) {
	if (argument.length()<3) {
	    JOptionPane.showMessageDialog(null,"Command line argument \"" +
	             argument + "\" too short.","COMMAND LINE INPUT ERROR",
			                        JOptionPane.ERROR_MESSAGE);
            errorFlag = false;
            }
        else if (argument.charAt(0) == '-') {
	    char flag = argument.charAt(1);
	    argument = argument.substring(2,argument.length());
	    switch (flag) {
		case 'C':  // Confidence threshold
	            confidence = Double.parseDouble(argument);
		    break;
	        case 'F':  // Data input file name
	    	    fileName = argument;
		    break;
		case 'N':  // Number of classes
		    numClasses =  Integer.parseInt(argument);
		    break;
		case 'O':  // Output file name (various potential uses)
	    	    outputFileName = argument;
		    break;
	        case 'S':  // Support threshold
	            support = Double.parseDouble(argument);
		    break;
	        default:
	            JOptionPane.showMessageDialog(null,"Unrecognise command " +
		    	       "line  argument: \"" + flag + argument + "\"'.",
			 "COMMAND LINE INPUT ERROR",JOptionPane.ERROR_MESSAGE);
		    errorFlag = false;
	        }
            }
        else {
	    JOptionPane.showMessageDialog(null,"All command line arguments " +
    				     "must commence with a '-' character ('" +
			  	   argument + "')","COMMAND LINE INPUT ERROR",
                                                   JOptionPane.ERROR_MESSAGE);
            errorFlag = false;
            }
	}

    /* CHECK INPUT ARGUMENTS */
    /** Invokes methods to check values associate with command line
    arguments */

    protected void CheckInputArguments() {

	// Check support and confidence input
	checkSupportAndConfidence();

	// Check file name
	checkFileName();

	// Return
	if (errorFlag) outputSettings();
	else outputMenu();
	}

    /* CHECK SUPPORT AND CONFIDANCE */
    /** Checks support and confidence input % values, if either is out of
    bounds then <TT>errorFlag</TT> set to <TT>false</TT>. */

    protected void checkSupportAndConfidence() {

	// Check Support
	if ((support < MIN_SUPPORT) || (support > MAX_SUPPORT)) {
	    JOptionPane.showMessageDialog(null,"Support must be specified " +
		    "as a percentage (" + MIN_SUPPORT + " - " + MAX_SUPPORT + 
		                ")","INPUT ERROR",JOptionPane.ERROR_MESSAGE);
	    errorFlag = false;
	    }

	// Check confidence
	if ((confidence < MIN_CONFIDENCE) || (confidence > MAX_CONFIDENCE)) {
	    JOptionPane.showMessageDialog(null,"Confidence must be " +
		          "specified as a percentage (" + MIN_CONFIDENCE +
				" - " + MAX_CONFIDENCE + ")","INPUT ERROR",
                                                JOptionPane.ERROR_MESSAGE);
	    errorFlag = false;
	    }
	}

    /* CHECK FILE NAME */
    /** Checks if data file name provided, if not <TT>errorFlag</TT> set
    to <TT>false</TT>. */

    protected void checkFileName() {
	if (fileName == null) {
	    JOptionPane.showMessageDialog(null,"Must specify file name (-F)",
	               "COMMAND LINE INPUT ERROR",JOptionPane.ERROR_MESSAGE);
            errorFlag = false;
	    }
	}

    /* ---------------------------------------------------------------- */
    /*                                                                  */
    /*                     READ INPUT DATA FROM FILE                    */
    /*                                                                  */
    /* ---------------------------------------------------------------- */

    /* INPUT DATA SET */

    /** Commences process of getting input data (GUI version also exists). */

    public void inputDataSet() {
        // Read the file
	readFile();

	// Check ordering (only if input format is OK)
	if (inputFormatOkFlag) {
	    if (checkOrdering()) {
                System.out.println("Number of records = " + numRows);
		countNumCols();
		System.out.println("Number of columns = " + numCols);
		minSupport = (numRows * support)/100.0;
        	System.out.println("Min support       = " +
				twoDecPlaces(minSupport) + " (records)");
		}
	    else {
	        JOptionPane.showMessageDialog(null,"Unknown error reading " +
		               "file: " + fileName + "\n","FILE INPUT ERROR",
                                                  JOptionPane.ERROR_MESSAGE);
	        closeFile();
	        System.exit(1);
		}
	    }
	}

    /* READ FILE */

    /** Reads input data from file specified in command line argument
    <TT>fileName</TT> (GUI version also exists). <P>Note that it is assumed
    that no empty records are included. Proceeds as follows:
    <OL>
    <LI>Gets number of rows (lines) in file, checking format of each line
    (space separated integers), if incorrectly formatted line found
    <TT>inputFormatOkFlag</TT> set to <TT>false</TT>.
    <LI>Dimensions input array.
    <LI>Reads data
    </OL> */

    protected void readFile() {
        try {
	    // Dimension data structure
	    inputFormatOkFlag=true;
	    numRows = getNumberOfLines(fileName);
	    if (inputFormatOkFlag) {
	        dataArray = new short[numRows][];
	        // Read file
		System.out.println("Reading input file: " + fileName);
	        readInputDataSet();
		}
	    else 
	        JOptionPane.showMessageDialog(null,"Error reading file: " + 
		                        fileName + "\n","FILE INPUT ERROR",
                                                JOptionPane.ERROR_MESSAGE);
	    }
	catch(IOException ioException) {
	        JOptionPane.showMessageDialog(null,"Unknown error reading " +
		               "file: " + fileName + "\n","FILE INPUT ERROR",
                                                  JOptionPane.ERROR_MESSAGE);
	    closeFile();
	    System.exit(1);
	    }
	}

    /* GET NUMBER OF LINES */

    /** Gets number of lines/records in input file and checks format of each
    line.
    @param nameOfFile the filename of the file to be opened.
    @return the number of rows in the given file. */

    protected int getNumberOfLines(String nameOfFile) throws IOException {
        int counter = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -