📄 assocrulemining.java
字号:
/* CLOSE FILE */ /** Close file fileName (instance field). */ protected void closeFile() { if (fileInput != null) { try { fileInput.close(); } catch (IOException ioException) { JOptionPane.showMessageDialog(this,"Error Closeing File", "Error: ",JOptionPane.ERROR_MESSAGE); } } } /* BINARY CONVERSION. */ /** Produce an item set (array of elements) from input line. @param dataLine row from the input data file @param numberOfTokens number of items in row @return 1-D array of short integers representing attributes in input row */ protected short[] binConversion(StringTokenizer dataLine, int numberOfTokens) { short number; short[] newItemSet = null; // Load array for (int tokenCounter=0;tokenCounter < numberOfTokens;tokenCounter++) { number = new Short(dataLine.nextToken()).shortValue(); newItemSet = realloc1(newItemSet,number); } // Return itemSet return(newItemSet); } /* ---------------------------------------------------------------- */ /* */ /* REORDER DATA SET ACCORDING TO ATTRIBUTE FREQUENCY */ /* */ /* ---------------------------------------------------------------- */ /* REORDER INPUT DATA: */ /** Reorders input data according to frequency of single attributes. <P> Example, given the data set: <PRE> 1 2 5 1 2 3 2 4 5 1 2 5 2 3 5 </PRE> This would produce a countArray (ignore index 0): <PRE> +---+---+---+---+---+---+ | | 1 | 2 | 3 | 4 | 5 | +---+---+---+---+---+---+ | | 3 | 5 | 2 | 1 | 4 | +---+---+---+---+---+---+ </PRE> Which sorts to: <PRE> +---+---+---+---+---+---+ | | 2 | 5 | 1 | 3 | 4 | +---+---+---+---+---+---+ | | 5 | 4 | 3 | 2 | 1 | +---+---+---+---+---+---+ </PRE> Giving rise to the conversion Array of the form (no index 0): <PRE> +---+---+---+---+---+---+ | | 3 | 1 | 4 | 5 | 2 | +---+---+---+---+---+---+ | | 3 | 5 | 2 | 1 | 4 | +---+---+---+---+---+---+ </PRE> Note that the second row here are the counts which no longer play a role in the conversion exercise. Thus to the new column number for column 1 is column 3 (i.e. the first vale at index 1). The reconversion array of the form: <PRE> +---+---+---+---+---+---+ | | 2 | 5 | 1 | 3 | 4 | +---+---+---+---+---+---+ </PRE> */ public void idInputDataOrdering() { // Count singles and store in countArray; int[][] countArray = countSingles(); // Bubble sort count array on support value (second index) orderCountArray(countArray); // Define conversion and reconversion arrays defConvertArrays(countArray); // Set sorted flag isOrderedFlag = true; } /* COUNT SINGLES */ /** Counts number of occurrences of each single attribute in the input data. @return 2-D array where first row represents column numbers and second row represents support counts. */ protected int[][] countSingles() { // Dimension and initialize count array int[][] countArray = new int[numCols+1][2]; for (int index=0;index<countArray.length;index++) { countArray[index][0] = index; countArray[index][1] = 0; } // Step through input data array counting singles and incrementing // appropriate element in the count array for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) { if (dataArray[rowIndex] != null) { for (int colIndex=0;colIndex<dataArray[rowIndex].length; colIndex++) countArray[dataArray[rowIndex][colIndex]][1]++; } } // Return return(countArray); } /* SORT COUNT ARRAY */ /** Bubble sorts count array produced by <TT>countSingles</TT> method so that array is ordered according to frequency of single items. @param countArray The 2-D array returned by the <TT>countSingles</TT> method. */ private void orderCountArray(int[][] countArray) { int attribute, quantity; boolean isOrdered; int index; do { isOrdered = true; index = 1; while (index < (countArray.length-1)) { if (countArray[index][1] >= countArray[index+1][1]) index++; else { isOrdered=false; // Swap attribute = countArray[index][0]; quantity = countArray[index][1]; countArray[index][0] = countArray[index+1][0]; countArray[index][1] = countArray[index+1][1]; countArray[index+1][0] = attribute; countArray[index+1][1] = quantity; // Increment index index++; } } } while (isOrdered==false); } /* SORT FIRST N ELEMENTS IN COUNT ARRAY */ /** Bubble sorts first N elements in count array produced by <TT>countSingles</TT> method so that array is ordered according to frequency of single items. <P> Used when ordering classification input data. @param countArray The 2-D array returned by the <TT>countSingles</TT> method. @param endIndex the index of the Nth element. */ protected void orderFirstNofCountArray(int[][] countArray, int endIndex) { int attribute, quantity; boolean isOrdered; int index; do { isOrdered = true; index = 1; while (index < endIndex) { if (countArray[index][1] >= countArray[index+1][1]) index++; else { isOrdered=false; // Swap attribute = countArray[index][0]; quantity = countArray[index][1]; countArray[index][0] = countArray[index+1][0]; countArray[index][1] = countArray[index+1][1]; countArray[index+1][0] = attribute; countArray[index+1][1] = quantity; // Increment index index++; } } } while (isOrdered==false); } /* DEFINE CONVERSION ARRAYS: */ /** Defines conversion and reconversion arrays. @param countArray The 2-D array sorted by the <TT>orderCcountArray</TT> method.*/ protected void defConvertArrays(int[][] countArray) { // Dimension arrays conversionArray = new int[numCols+1][2]; reconversionArray = new short[numCols+1]; // Assign values for(int index=1;index<countArray.length;index++) { conversionArray[countArray[index][0]][0] = index; conversionArray[countArray[index][0]][1] = countArray[index][1]; reconversionArray[index] = (short) countArray[index][0]; } // Diagnostic ouput if desired //outputConversionArrays(); } /* RECAST INPUT DATA. */ /** Recasts the contents of the data array so that each record is ordered according to conversion array. <P>Proceed as follows: 1) For each record in the data array. Create an empty new itemSet array. 2) Place into this array attribute/column numbers that correspond to the appropriate equivalents contained in the conversion array. 3) Reorder this itemSet and return into the data array. */ public void recastInputData() { short[] itemSet; int attribute; // Step through data array using loop construct for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) { itemSet = new short[dataArray[rowIndex].length]; // For each element in the itemSet replace with attribute number // from conversion array for(int colIndex=0;colIndex<dataArray[rowIndex].length;colIndex++) { attribute = dataArray[rowIndex][colIndex]; itemSet[colIndex] = (short) conversionArray[attribute][0]; } // Sort itemSet and return to data array sortItemSet(itemSet); dataArray[rowIndex] = itemSet; } } /* RECAST INPUT DATA AND REMOVE UNSUPPORTED SINGLE ATTRIBUTES. */ /** Recasts the contents of the data array so that each record is ordered according to ColumnCounts array and excludes non-supported elements. <P> Proceed as follows: 1) For each record in the data array. Create an empty new itemSet array. 2) Place into this array any column numbers in record that are supported at the index contained in the conversion array. 3) Assign new itemSet back into to data array */ public void recastInputDataAndPruneUnsupportedAtts() { short[] itemSet; int attribute; // Step through data array using loop construct for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) { // Check for empty row if (dataArray[rowIndex]!= null) { itemSet = null; // For each element in the current record find if supported with // reference to the conversion array. If so add to "itemSet". for(int colIndex=0;colIndex<dataArray[rowIndex].length;colIndex++) { attribute = dataArray[rowIndex][colIndex]; // Check support if (conversionArray[attribute][1] >= minSupport) { itemSet = reallocInsert(itemSet, (short) conversionArray[attribute][0]); } } // Return new item set to data array dataArray[rowIndex] = itemSet; } } // Set isPrunedFlag (used with GUI interface) isPrunedFlag=true; // Reset number of one item sets field numOneItemSets = getNumSupOneItemSets(); } /* GET NUM OF SUPPORTE ONE ITEM SETS */ /** Gets number of supported single item sets (note this is not necessarily the same as the number of columns/attributes in the input set). @return Number of supported 1-item sets */ protected int getNumSupOneItemSets() { int counter = 0; // Step through conversion array incrementing counter for each // supported element found for (int index=1;index < conversionArray.length;index++) { if (conversionArray[index][1] >= minSupport) counter++; } // Return return(counter); } /* RESIZE INPUT DATA */ /** Recasts the input data sets so that only N percent is used. @param percentage the percentage of the current input data that is to form the new input data set (number between 0 and 100). */ public void resizeInputData(double percentage) { // Redefine number of rows numRows = (int) ((double) numRows*(percentage/100.0)); System.out.println("Recast input data, new num rows = " + numRows); // Dimension and populate training set. short[][] trainingSet = new short[numRows][]; for (int index=0;index<numRows;index++) trainingSet[index] = dataArray[index]; // Assign training set label to input data set label. dataArray = trainingSet; // Determine new minimum support threshold value minSupport = (numRows * support)/100.0; } /* ----------------------------------------------- */ /* */ /* ITEM SET INSERT AND ADD METHODS */ /* */ /* ----------------------------------------------- */ /* APPEND */ /** Concatenates two itemSets --- resizes given array so that its length is increased by size of second array and second array added. @param itemSet1 The first item set. @param itemSet2 The item set to be appended. @return the combined item set */ protected short[] append(short[] itemSet1, short[] itemSet2) { // Test for empty sets, if found return other if (itemSet1 == null) return(copyItemSet(itemSet2)); else if (itemSet2 == null) return(copyItemSet(itemSet1)); // Create new array short[] newItemSet = new short[itemSet1.length+itemSet2.length]; // Loop through itemSet 1 int index1; for(index1=0;index1<itemSet1.length;index1++) { newItemSet[index1]=itemSet1[index1]; } // Loop through itemSet 2 for(int index2=0;index2<itemSet2.length;index2++) { newItemSet[index1+index2]=itemSet2[index2]; } // Return return(newItemSet); } /* REALLOC INSERT */ /** Resizes given item set so that its length is increased by one and new element inserted. @param oldItemSet the original item set @param newElement the new element/attribute to be inserted @return the combined item set */ protected short[] reallocInsert(short[] oldItemSet, short newElement) { // No old item set if (oldItemSet == null) { short[] newItemSet = {newElement}; return(newItemSet); } // Otherwise create new item set with length one greater than old // item set int oldItemSetLength = oldItemSet.length; short[] newItemSet = new short[oldItemSetLength+1]; // Loop int index1; for (index1=0;index1 < oldItemSetLength;index1++) { if (newElement < oldItemSet[index1]) { newItemSet[index1] = newElement; // Add rest for(int index2 = index1+1;index2<newItemSet.length;index2++) newItemSet[index2] = oldItemSet[index2-1]; return(newItemSet); } else newItemSet[index1] = oldItemSet[index1]; } // Add to end newItemSet[newItemSet.length-1] = newElement; // Return new item set return(newItemSet); } /* REALLOC 1 */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -