📄 assocrulemining.java
字号:
// ordered.
for(int index=1;index<=endIndex;index++) {
conversionArray[countArray[index][0]][0] = index;
conversionArray[countArray[index][0]][1] = countArray[index][1];
reconversionArray[index] = (short) countArray[index][0];
}
// Last elements unchnaged (classes)
for(int index=endIndex+1;index<countArray.length;index++) {
conversionArray[countArray[index][0]][0] = index;
conversionArray[countArray[index][0]][1] = countArray[index][1];
reconversionArray[index] = (short) index;
}
// Diagnostic ouput if desired
//outputConversionArrays();
}
/* RECAST INPUT DATA. */
/** Recasts the contents of the (input) data array so that each record is
ordered according to conversion array.
<P>Proceed as follows:
1) For each record in the data array. Create an empty new itemSet array.
2) Place into this array attribute/column numbers that correspond to the
appropriate equivalents contained in the conversion array.
3) Reorder this itemSet and return into the data array. */
public void recastInputData() {
short[] itemSet;
int attribute;
// Step through data array using loop construct
for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) {
itemSet = new short[dataArray[rowIndex].length];
// For each element in the itemSet replace with attribute number
// from conversion array
for(int colIndex=0;colIndex<dataArray[rowIndex].length;colIndex++) {
attribute = dataArray[rowIndex][colIndex];
itemSet[colIndex] = (short) conversionArray[attribute][0];
}
// Sort itemSet and return to data array
sortItemSet(itemSet);
dataArray[rowIndex] = itemSet;
}
}
/* RECAST INPUT DATA AND REMOVE UNSUPPORTED SINGLE ATTRIBUTES. */
/** Recasts the contents of the data array so that each record is
ordered according to ColumnCounts array and excludes non-supported
elements. <P> Proceed as follows:
1) For each record in the data array. Create an empty new itemSet array.
2) Place into this array any column numbers in record that are
supported at the index contained in the conversion array.
3) Assign new itemSet back into to data array */
public void recastInputDataAndPruneUnsupportedAtts() {
short[] itemSet;
int attribute;
// Step through data array using loop construct
for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) {
// Check for empty row
if (dataArray[rowIndex]!= null) {
itemSet = null;
// For each element in the current record find if supported with
// reference to the conversion array. If so add to "itemSet".
for(int colIndex=0;colIndex<dataArray[rowIndex].length;colIndex++) {
attribute = dataArray[rowIndex][colIndex];
// Check support
if (conversionArray[attribute][1] >= minSupport) {
itemSet = reallocInsert(itemSet,
(short) conversionArray[attribute][0]);
}
}
// Return new item set to data array
dataArray[rowIndex] = itemSet;
}
}
// Set isPrunedFlag (used with GUI interface)
isPrunedFlag=true;
// Reset number of one item sets field
numOneItemSets = getNumSupOneItemSets();
}
/* GET NUM OF SUPPORTE ONE ITEM SETS */
/** Gets number of supported single item sets (note this is not necessarily
the same as the number of columns/attributes in the input set).
@return Number of supported 1-item sets */
protected int getNumSupOneItemSets() {
int counter = 0;
// Step through conversion array incrementing counter for each
// supported element found
for (int index=1;index < conversionArray.length;index++) {
if (conversionArray[index][1] >= minSupport) counter++;
}
// Return
return(counter);
}
/* RESIZE INPUT DATA */
/** Recasts the input data sets so that only N percent is used.
@param percentage the percentage of the current input data that is to form
the new input data set (number between 0 and 100). */
public void resizeInputData(double percentage) {
// Redefine number of rows
numRows = (int) ((double) numRows*(percentage/100.0));
System.out.println("Recast input data, new num rows = " + numRows);
// Dimension and populate training set.
short[][] trainingSet = new short[numRows][];
for (int index=0;index<numRows;index++)
trainingSet[index] = dataArray[index];
// Assign training set label to input data set label.
dataArray = trainingSet;
// Determine new minimum support threshold value
minSupport = (numRows * support)/100.0;
}
/** Reconverts given item set according to contents of reconversion array.
@param itemSet the fgiven itemset.
@return the reconverted itemset. */
protected short[] reconvertItemSet(short[] itemSet) {
// If no conversion return orginal item set
if (reconversionArray==null) return(itemSet);
// If item set null return null
if (itemSet==null) return(null);
// Define new item set
short[] newItemSet = new short[itemSet.length];
// Copy
for(int index=0;index<newItemSet.length;index++) {
newItemSet[index] = reconversionArray[itemSet[index]];
}
// Return
return(newItemSet);
}
/** Reconvert single item if appropriate.
@param item the given item (attribute).
@return the reconvered item. */
protected short reconvertItem(short item) {
// If no conversion return orginal item
if (reconversionArray==null) return(item);
// Otherwise rerturn reconvert item
return(reconversionArray[item]);
}
/* ---------------------------------------------------------- */
/* */
/* CONVERT FROM HORIZONTAL TO VERTICAL FORMAT */
/* */
/* ---------------------------------------------------------- */
/* HORIZONTAL TO VERTICAL */
/** Converts input data from horizontal format to vertical format. <P>
Data set is stored in a 2-D array of short integers. First index (the tid
index) represents the row/record/transaction number and second (the data
index) the column/attribute number. WARNINGS: (1) Assumes that no input
data reordering or pruning has been implemented, (2) Original dataset is
deleted. */
public void horizontal2vertical() {
// Initialize current indexes array, list of attribute numbers in the
// original horizontal dataset. This array will contain current index
// markers for each new record. Initially these markers will be set at 0.
// Note that there is no attribute 0
int[] currentIndexes = new int[numCols+1];
for (int index=0;index<currentIndexes.length;index++)
currentIndexes[index]=0;
// Dimension new array
short[][] newArray = h2vDimNewDataArry();
// Loop through old dataset and cast old horizontal data to vertical
// format.
for (int tidIndex=0;tidIndex<dataArray.length;tidIndex++) {
// Check if attributes for this record?
if (dataArray[tidIndex] != null) {
for (int dataIndex=0;dataIndex<dataArray[tidIndex].length;
dataIndex++) {
int columnIndex = dataArray[tidIndex][dataIndex]-1;
int currentIndex = currentIndexes[columnIndex];
newArray[columnIndex][currentIndex] = (short) tidIndex;
currentIndexes[columnIndex]++;
}
}
}
// Replace old array reference
dataArray = newArray;
// Reassign diemensions
numCols = numRows;
numRows = dataArray.length;
}
/* HORIZONTAL TO VERTICAL DIMENSION NEW DATA ARRAY */
/** Dimensions new data array when converting from horizontal to vertical
format.
@return the newly dimensioned 2-D array. */
private short[][] h2vDimNewDataArry() {
// Count singles on old array and store in countArray,
int[][] countArray = countSingles();
// Initialise the new vertical data array. Overall length is equivalent
// To the number of supported one itemSets. Length of each element
// depends on the support for that element, i.e. the number of records
// in the original horizontal data set where the attribute appears.
// This is available from the countArray local 2-D array.
short[][] newArray = new short[countArray.length-1][];
for (int index=1;index<countArray.length;index++) {
newArray[countArray[index][0]-1] = new short[countArray[index][1]];
}
// Return
return(newArray);
}
/* -------------------------------------------------------------- */
/* */
/* SEGMENT DATA */
/* */
/* -------------------------------------------------------------- */
/* Set of methods used for data segmnetation experiments. */
/* SEGMENT DATA SET */
/** Horizontally segements the input data set into N segements and stores
to disk using file names made up of the input file nam,e plus the segment
number.
@param numSegments the number of segments into which the data is to be
decompossed. */
public void segmentDataSet(int numSegments) throws IOException {
// Calculate number of rows per segement
int rowsPerSegment = calcRowsPerSegment(numSegments);
// Determin file name
int fileNameIndex = fileName.lastIndexOf('/');
String shortFileName = fileName.substring(fileNameIndex+1,
fileName.length());
// Open input data file
openFileName(shortFileName);
// Loop through input data for N-1 segments
int startRecord=0;
int endRecord=rowsPerSegment;
for (int segIndex=1;segIndex<numSegments;segIndex++) {
String outputFileName = shortFileName + segIndex;
// Step through input data file
ouputSegmentToFile(outputFileName,startRecord,endRecord);
// Increment counters
startRecord=endRecord;
endRecord=endRecord+rowsPerSegment;
}
// Process last segment (may have slightly more records than previous
// segments)
String outputFileName = shortFileName + numSegments;
ouputSegmentToFile(outputFileName,startRecord,numRows);
// Close input file
fileInput.close();
}
/* READ, SEGMENT AND PARTITION DATA SEGMENTS */
/** Reads input data segment by segement and stores segment in memory which
is then partitioned and written to file.
@param numSegments the number of segments into which the data is to be
decompossed. */
public void readSegAndPartData(int numSegments) throws IOException {
// Calculate number of rows per segement
int rowsPerSegment = calcRowsPerSegment(numSegments);
// Determin "root" file name (which will have segment and partition
// numbers apppended to it.
int fileNameIndex = fileName.lastIndexOf('/');
String shortFileName = fileName.substring(fileNameIndex+1,
fileName.length());
// Open input data file
openFileName(shortFileName);
// Process all but last segment
for (int segIndex=1;segIndex<numSegments;segIndex++) {
// Define data array
dataArray = new short[rowsPerSegment][];
// Read segment from input file and store in data array
for (int rowIndex=0;rowIndex<rowsPerSegment;rowIndex++) {
// Read line
String line = fileInput.readLine();
// Process line
processInputLine(line,rowIndex);
}
// Partition segment
partitionDataArray(shortFileName + segIndex);
}
// Pocess last segment
int rowsInLastSeg = numRows-(rowsPerSegment*(numSegments-1));
// Define data array
dataArray = new short[rowsInLastSeg][];
// Read segment from input file and store in data array
for (int rowIndex=0;rowIndex<rowsInLastSeg;rowIndex++) {
// Read line
String line = fileInput.readLine();
// Process line
processInputLine(line,rowIndex);
}
// Partition segment
partitionDataArray(shortFileName + numSegments);
// Close file
fileInput.close();
}
/* PARTITION DATA AEEAY */
/** Vertically partitions, one partition per attribute, and stores to disk
using file names made up of the input file name plus column
numbers.
@param fName the name of the file data is to be stored in. */
protected void partitionDataArray(String fName) throws IOException {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -