📄 assocrulemining.java
字号:
/* -------------------------------------------------------------------------- *//* *//* ASSOCIATION RULE DATA MINING *//* *//* Frans Coenen *//* *//* Wednesday 9 January 2003 *//* (revised 21/1/2003, 14/2/2003, 2/5/2003, 2/7/2003, 3/2/2004, 27/10/2006) *//* *//* Department of Computer Science *//* The University of Liverpool *//* */ /* -------------------------------------------------------------------------- */// Java packagesimport java.io.*;import java.util.*;// Java GUI packagesimport javax.swing.*;/** Set of utilities to support various Association Rule Mining (ARM) algorithms included in the LUCS-KDD suite of ARM programs. @author Frans Coenen@version 2 July 2003 */public class AssocRuleMining extends JFrame { /* ------ FIELDS ------ */ // Data structures /** 2-D aray to hold input data from data file */ protected short[][] dataArray = null; /** 2-D array used to renumber columns for input data in terms of frequency of single attributes (reordering will enhance performance for some ARM algorithms). */ protected int[][] conversionArray = null; /** 1-D array used to reconvert input data column numbers to their original numbering where the input data has been ordered to enhance computational efficiency. */ protected short[] reconversionArray = null; // Constants /** Minimum support value */ private static final double MIN_SUPPORT = 0.0; /** Maximum support value */ private static final double MAX_SUPPORT = 100.0; /** Maximum confidence value */ private static final double MIN_CONFIDENCE = 0.0; /** Maximum confidence value */ private static final double MAX_CONFIDENCE = 100.0; // Command line arguments with default values and associated fields /** Command line argument for data file name. */ protected String fileName = null; /** Command line argument for number of columns. */ protected int numCols = 0; /** Command line argument for number of rows. */ protected int numRows = 0; /** Command line argument for % support (default = 20%). */ protected double support = 20.0; /** Minimum support value in terms of number of rows. */ protected double minSupport = 0; /** Command line argument for % confidence (default = 80%). */ protected double confidence = 80.0; /** The number of one itemsets (singletons). */ protected int numOneItemSets = 0; // Flags /** Error flag used when checking command line arguments (default = <TT>true</TT>). */ protected boolean errorFlag = true; /** Input format OK flag( default = <TT>true</TT>). */ protected boolean inputFormatOkFlag = true; /** Flag to indicate whether system has data or not. */ private boolean haveDataFlag = false; /** Flag to indicate whether input data has been sorted or not. */ private boolean isOrderedFlag = false; /** Flag to indicate whether input data has been sorted and pruned or not. */ private boolean isPrunedFlag = false; // Other fields /** The input stream. */ protected BufferedReader fileInput; /** The file path */ protected File filePath = null; /* ------ CONSTRUCTORS ------ */ /** Processes command line arguments */ public AssocRuleMining(String[] args) { // Process command line arguments for(int index=0;index<args.length;index++) idArgument(args[index]); // If command line arguments read successfully (errorFlag set to "true") // check validity of arguments if (errorFlag) CheckInputArguments(); else outputMenu(); } /** Default constructor used in particular when creating an isnatnce of the class RuleList which is a subclass of theAssocRuleMining class. */ public AssocRuleMining() { } /* ------ METHODS ------ */ /* ---------------------------------------------------------------- */ /* */ /* COMMAND LINE ARGUMENTS */ /* */ /* ---------------------------------------------------------------- */ /* IDENTIFY ARGUMENT */ /** Identifies nature of individual command line agruments: -C = confidence, -F = file name, -S = support. */ protected void idArgument(String argument) { if (argument.charAt(0) == '-') { char flag = argument.charAt(1); argument = argument.substring(2,argument.length()); switch (flag) { case 'C': confidence = Double.parseDouble(argument); break; case 'F': fileName = argument; break; case 'S': support = Double.parseDouble(argument); break; default: System.out.println("INPUT ERROR: Unrecognise command " + "line argument -" + flag + argument); errorFlag = false; } } else { System.out.println("INPUT ERROR: All command line arguments " + "must commence with a '-' character (" + argument + ")"); errorFlag = false; } } /* CHECK INPUT ARGUMENTS */ /** Invokes methods to check values associate with command line arguments */ protected void CheckInputArguments() { // Check support and confidence input checkSupportAndConfidence(); // Check file name checkFileName(); // Return if (errorFlag) outputSettings(); else outputMenu(); } /* CHECK SUPPORT AND CONFIDANCE */ /** Checks support and confidence input % values, if either is out of bounds then <TT>errorFlag</TT> set to <TT>false</TT>. */ protected void checkSupportAndConfidence() { // Check Support if ((support < MIN_SUPPORT) || (support > MAX_SUPPORT)) System.out.println("INPUT ERROR: Support must be specified " + "as a percentage (" + MIN_SUPPORT + " - " + MAX_SUPPORT + ")"); // Check confidence if ((confidence < MIN_CONFIDENCE) || (confidence > MAX_CONFIDENCE)) System.out.println("INPUT ERROR: Confidence must be " + "specified as a percentage (" + MIN_CONFIDENCE + " - " + MAX_CONFIDENCE + ")"); } /* CHECK FILE NAME */ /** Checks if data file name provided, if not <TT>errorFlag</TT> set to <TT>false</TT>. */ protected void checkFileName() { if (fileName == null) { System.out.println("INPUT ERROR: Must specify file name (-F)"); errorFlag = false; } } /* ---------------------------------------------------------------- */ /* */ /* READ INPUT DATA FROM FILE */ /* */ /* ---------------------------------------------------------------- */ /* INPUT DATA SET */ /** Commences process of getting input data (GUI version also exists). */ public void inputDataSet() { // Read the file readFile(); // Check ordering (only if input format is OK) if (inputFormatOkFlag) { if (checkOrdering()) { System.out.println("Number of records = " + numRows); countNumCols(); System.out.println("Number of columns = " + numCols); minSupport = (numRows * support)/100.0; System.out.println("Min support = " + twoDecPlaces(minSupport) + " (records)"); } else { System.out.println("Error reading file: " + fileName + "\n"); System.exit(1); } } } /* READ FILE */ /** Reads input data from file specified in command line argument (GUI version also exists). <P>Proceeds as follows: <OL> <LI>Gets number of lines in file, checking format of each line (space separated integers), if incorrectly formatted line found <TT>inputFormatOkFlag</TT> set to <TT>false</TT>. <LI>Dimensions input array. <LI>Reads data </OL> */ public void readFile() { try { // Dimension data structure inputFormatOkFlag=true; numRows = getNumberOfLines(fileName); if (inputFormatOkFlag) { dataArray = new short[numRows][]; // Read file System.out.println("Reading input file: " + fileName); readInputDataSet(); } else System.out.println("Error reading file: " + fileName + "\n"); } catch(IOException ioException) { System.out.println("Error reading File"); closeFile(); System.exit(1); } } /* GET NUMBER OF LINES */ /** Gets number of lines/records in input file and checks format of each line. @param nameOfFile the filename of the file to be opened. @return the number pf rows in the given file. */ protected int getNumberOfLines(String nameOfFile) throws IOException { int counter = 0; // Open the file if (filePath==null) openFileName(nameOfFile); else openFilePath(); // Loop through file incrementing counter // get first row. String line = fileInput.readLine(); while (line != null) { checkLine(counter+1,line); StringTokenizer dataLine = new StringTokenizer(line); int numberOfTokens = dataLine.countTokens(); if (numberOfTokens == 0) break; counter++; line = fileInput.readLine(); } // Close file and return closeFile(); return(counter); } /* CHECK LINE */ /** Check whether given line from input file is of appropriate format (space separated integers), if incorrectly formatted line found <TT>inputFormatOkFlag</TT> set to <TT>false</TT>. @param counter the line number in the input file. @param str the current line from the input file. */ protected void checkLine(int counter, String str) { for (int index=0;index <str.length();index++) { if (!Character.isDigit(str.charAt(index)) && !Character.isWhitespace(str.charAt(index))) { JOptionPane.showMessageDialog(null,"FILE INPUT ERROR:\n" + "charcater on line " + counter + " is not a digit or white space"); inputFormatOkFlag = false; haveDataFlag = false; break; } } } /* READ INPUT DATA SET */ /** Reads input data from file specified in command line argument. */ public void readInputDataSet() throws IOException { int rowIndex=0; // Open the file if (filePath==null) openFileName(fileName); else openFilePath(); // get first row. String line = fileInput.readLine(); while (line != null) { StringTokenizer dataLine = new StringTokenizer(line); int numberOfTokens = dataLine.countTokens(); if (numberOfTokens == 0) break; // Convert input string to a sequence of short integers short[] code = binConversion(dataLine,numberOfTokens); // Check for "null" input if (code != null) { // Dimension row in 2-D dataArray int codeLength = code.length; dataArray[rowIndex] = new short[codeLength]; // Assign to elements in row for (int colIndex=0;colIndex<codeLength;colIndex++) dataArray[rowIndex][colIndex] = code[colIndex]; } else dataArray[rowIndex]= null; // Increment first index in 2-D data array rowIndex++; // get next line line = fileInput.readLine(); } // Close file closeFile(); } /* CHECK DATASET ORDERING */ /** Checks that data set is ordered correctly. */ protected boolean checkOrdering() { boolean result = true; // Loop through input data for(int index=0;index<dataArray.length;index++) { if (!checkLineOrdering(index+1,dataArray[index])) { haveDataFlag = false; result=false; } } // Return return(result); } /* CHECK LINE ORDERING */ /** Checks whether a given line in the input data is in numeric sequence. @param lineNum the line number. @param itemSet the item set represented by the line @return true if OK and false otherwise. */ private boolean checkLineOrdering(int lineNum, short[] itemSet) { for (int index=0;index<itemSet.length-1;index++) { if (itemSet[index] >= itemSet[index+1]) { JOptionPane.showMessageDialog(null,"FILE FORMAT ERROR:\n" + "Attribute data in line " + lineNum + " not in numeric order"); return(false); } } // Default return return(true); } /* COUNT NUMBER OF COLUMNS */ /** Counts number of columns represented by input data. */ protected void countNumCols() { int maxAttribute=0; // Loop through data array for(int index=0;index<dataArray.length;index++) { int lastIndex = dataArray[index].length-1; if (dataArray[index][lastIndex] > maxAttribute) maxAttribute = dataArray[index][lastIndex]; } numCols = maxAttribute; numOneItemSets = numCols; // default value only } /* OPEN FILE NAME */ /** Opens file using fileName (instance field). @param nameOfFile the filename of the file to be opened. */ protected void openFileName(String nameOfFile) { try { // Open file FileReader file = new FileReader(nameOfFile); fileInput = new BufferedReader(file); } catch(IOException ioException) { JOptionPane.showMessageDialog(this,"Error Opening File", "Error: ",JOptionPane.ERROR_MESSAGE); } } /* OPEN FILE PATH */ /** Opens file using filePath (instance field). */ private void openFilePath() { try { // Open file FileReader file = new FileReader(filePath); fileInput = new BufferedReader(file); } catch(IOException ioException) { JOptionPane.showMessageDialog(this,"Error Opening File", "Error: ",JOptionPane.ERROR_MESSAGE); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -