📄 assocrulemining.java
字号:
/* -------------------------------------------------------------------------- */
/* */
/* ASSOCIATION RULE DATA MINING */
/* */
/* Frans Coenen */
/* */
/* Wednesday 9 January 2003 */
/* (revised 21/1/2003, 14/2/2003, 2/5/2003, 2/7/2003, 3/2/2004, 8/5/2004, */
/* 1/2/2005, 3/2/2005, 14/2/06, 14/3/06, 18/6/06, 1/7/2006, 11/10/2006, */
/* 27/10/2006, 14/11/2006) */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* -------------------------------------------------------------------------- */
//package lucsKDD_ARM;
/* To compile: javaARMpackc.exe AssocRuleMining.java */
// Java packages
import java.io.*;
import java.util.*;
// Java GUI packages
import javax.swing.*;
/** Set of utillities to support various Association Rule Mining (ARM)
algorithms included in the LUCS-KDD suite of ARM programs.
@author Frans Coenen
@version 11 October 2006 */
public class AssocRuleMining extends JFrame {
/* ------ FIELDS ------ */
/** Constants. */
protected int MAX_NUM_RULES = 10000;
// Data structures
/** The reference to start of the rule list. */
protected RuleNode startRulelist = null;
/** 2-D aray to hold input data from data file. Note that within the data
array records are numbered from zero, thus record one has index 0 etc.
<P>First index is row (record or TID) number starting from 0, and second
is attribute (column) number starting from zero. */
protected short[][] dataArray = null;
/** 2-D array used to renumber columns for input data in terms of
frequency of single attributes (reordering will enhance performance
for some ARM algorithms). */
protected int[][] conversionArray = null;
/** 1-D array used to reconvert input data column numbers to their
original numbering where the input data has been ordered to enhance
computational efficiency. */
protected short[] reconversionArray = null;
/** 1-D array to hold output schema. */
private String[] outputSchema = null;
// Constants
/** Minimum support value */
protected static final double MIN_SUPPORT = 0.0;
/** Maximum support value */
protected static final double MAX_SUPPORT = 100.0;
/** Maximum confidence value */
protected static final double MIN_CONFIDENCE = 0.0;
/** Maximum confidence value */
protected static final double MAX_CONFIDENCE = 100.0;
// Command line arguments with default values and associated fields.
/** Command line argument for data file name. */
protected String fileName = null;
/** Command line argument for output file name. */
protected String outputFileName = null;
/** Command line argument for file name for testset (used in classification
where separate test and training set files may be used. */
protected String testSetFileName = null;
/** Command line argument for number of columns (attributes) in input
data. */
protected int numCols = 0;
/** Command line argument for number of rows in input data. */
protected int numRows = 0;
/** Command line argument for % support (default = 20%). */
protected double support = 20.0;
// More fields
/** Minimum support value in terms of number of rows. <P>Set when input
data is read and the number of records is known, reset if input data is
resized so that only N percent is used. */
protected double minSupport = 0;
/** Command line argument for % confidence (default = 80%). */
protected double confidence = 80.0;
/** The number of one itemsets (singletons). */
protected int numOneItemSets = 0;
/** The number of frequent sets identified during processing. */
protected int numFrequentSets = 0;
/** The number of rules (ARs, CARs or CRs) that have been generated. */
protected int numRules = 0;
/** Number of classes in input data set (input by the user). */
protected int numClasses = 0;
/** Number of rows in output schema. */
private int numRowsInOutputSchema = 0;
// Flags
/** Error flag used when checking command line arguments (default =
<TT>true</TT>). */
protected boolean errorFlag = true;
/** Input format OK flag( default = <TT>true</TT>). */
protected boolean inputFormatOkFlag = true;
/** Flag to indicate whether system has data or not. */
private boolean haveDataFlag = false;
/** Flag to indicate whether input data has been sorted or not. */
protected boolean isOrderedFlag = false;
/** Flag to indicate whether input data has been sorted and pruned or
not. */
protected boolean isPrunedFlag = false;
/** Flag to indicate whether output schema is available or not. */
protected boolean hasOutputSchemaFlag = false;
/** Support confidence framework flag. */
protected boolean supConfFworkFlag = false;
/** Support lift framework flag. */
protected boolean supLiftFworkFlag = false;
/** Output rule set to file flag */
protected boolean outputRuleSetToFileFlag = false;
// Other fields
/** The input stream, instance of class <TT>BufferedReader</TT>. */
protected BufferedReader fileInput;
/** The output stream, instance of calss <TT>PrintWriter</TT> */
private PrintWriter fileOutput;
/** The file path */
protected File filePath = null;
/* ------ CONSTRUCTORS ------ */
/** Constructor with command line arguments to be process.
@param args the command line arguments (array of String instances). */
public AssocRuleMining(String[] args) {
// Process command line arguments
for(int index=0;index<args.length;index++) idArgument(args[index]);
// If command line arguments read successfully (errorFlag set to
// "true") check validity of arguments
if (errorFlag) CheckInputArguments();
else outputMenu();
}
/** One argument constructior with argument from existing instance of
class AssocRuleMining.
@param armInstance the given instance of the <TT>AssocRuleMining</TT>
class. */
public AssocRuleMining(AssocRuleMining armInstance) {
outputSchema = armInstance.outputSchema;
}
/** Default constructor used by: (1) BruteForce, (2) Total Support
Tree, (3) ClassAppMapGUI and (4) AprioriTgui classes and others. */
public AssocRuleMining() {
}
/* ------ METHODS ------ */
/* ---------------------------------------------------------------- */
/* */
/* COMMAND LINE ARGUMENTS */
/* */
/* ---------------------------------------------------------------- */
/* IDENTIFY ARGUMENT */
/** Identifies nature of individual command line agruments:
-C = confidence, -F = file name, -S = support. */
protected void idArgument(String argument) {
if (argument.length()<3) {
JOptionPane.showMessageDialog(null,"Command line argument \"" +
argument + "\" too short.","COMMAND LINE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
else if (argument.charAt(0) == '-') {
char flag = argument.charAt(1);
argument = argument.substring(2,argument.length());
switch (flag) {
case 'C': // Confidence threshold
confidence = Double.parseDouble(argument);
break;
case 'F': // Data input file name
fileName = argument;
break;
case 'N': // Number of classes
numClasses = Integer.parseInt(argument);
break;
case 'O': // Output file name (various potential uses)
outputFileName = argument;
break;
case 'S': // Support threshold
support = Double.parseDouble(argument);
break;
default:
JOptionPane.showMessageDialog(null,"Unrecognise command " +
"line argument: \"" + flag + argument + "\"'.",
"COMMAND LINE INPUT ERROR",JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
}
else {
JOptionPane.showMessageDialog(null,"All command line arguments " +
"must commence with a '-' character ('" +
argument + "')","COMMAND LINE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
}
/* CHECK INPUT ARGUMENTS */
/** Invokes methods to check values associate with command line
arguments */
protected void CheckInputArguments() {
// Check support and confidence input
checkSupportAndConfidence();
// Check file name
checkFileName();
// Return
if (errorFlag) outputSettings();
else outputMenu();
}
/* CHECK SUPPORT AND CONFIDANCE */
/** Checks support and confidence input % values, if either is out of
bounds then <TT>errorFlag</TT> set to <TT>false</TT>. */
protected void checkSupportAndConfidence() {
// Check Support
if ((support < MIN_SUPPORT) || (support > MAX_SUPPORT)) {
JOptionPane.showMessageDialog(null,"Support must be specified " +
"as a percentage (" + MIN_SUPPORT + " - " + MAX_SUPPORT +
")","INPUT ERROR",JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
// Check confidence
if ((confidence < MIN_CONFIDENCE) || (confidence > MAX_CONFIDENCE)) {
JOptionPane.showMessageDialog(null,"Confidence must be " +
"specified as a percentage (" + MIN_CONFIDENCE +
" - " + MAX_CONFIDENCE + ")","INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
}
/* CHECK FILE NAME */
/** Checks if data file name provided, if not <TT>errorFlag</TT> set
to <TT>false</TT>. */
protected void checkFileName() {
if (fileName == null) {
JOptionPane.showMessageDialog(null,"Must specify file name (-F)",
"COMMAND LINE INPUT ERROR",JOptionPane.ERROR_MESSAGE);
errorFlag = false;
}
}
/* ---------------------------------------------------------------- */
/* */
/* READ INPUT DATA FROM FILE */
/* */
/* ---------------------------------------------------------------- */
/* INPUT DATA SET */
/** Commences process of getting input data (GUI version also exists). */
public void inputDataSet() {
// Read the file
readFile();
// Check ordering (only if input format is OK)
if (inputFormatOkFlag) {
if (checkOrdering()) {
System.out.println("Number of records = " + numRows);
countNumCols();
System.out.println("Number of columns = " + numCols);
minSupport = (numRows * support)/100.0;
System.out.println("Min support = " +
twoDecPlaces(minSupport) + " (records)");
}
else {
JOptionPane.showMessageDialog(null,"Unknown error reading " +
"file: " + fileName + "\n","FILE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
closeFile();
System.exit(1);
}
}
}
/* READ FILE */
/** Reads input data from file specified in command line argument
<TT>fileName</TT> (GUI version also exists). <P>Note that it is assumed
that no empty records are included. Proceeds as follows:
<OL>
<LI>Gets number of rows (lines) in file, checking format of each line
(space separated integers), if incorrectly formatted line found
<TT>inputFormatOkFlag</TT> set to <TT>false</TT>.
<LI>Dimensions input array.
<LI>Reads data
</OL> */
protected void readFile() {
try {
// Dimension data structure
inputFormatOkFlag=true;
numRows = getNumberOfLines(fileName);
if (inputFormatOkFlag) {
dataArray = new short[numRows][];
// Read file
System.out.println("Reading input file: " + fileName);
readInputDataSet();
}
else
JOptionPane.showMessageDialog(null,"Error reading file: " +
fileName + "\n","FILE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(null,"Unknown error reading " +
"file: " + fileName + "\n","FILE INPUT ERROR",
JOptionPane.ERROR_MESSAGE);
closeFile();
System.exit(1);
}
}
/* GET NUMBER OF LINES */
/** Gets number of lines/records in input file and checks format of each
line.
@param nameOfFile the filename of the file to be opened.
@return the number of rows in the given file. */
protected int getNumberOfLines(String nameOfFile) throws IOException {
int counter = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -