📄 logfileqsort.java
字号:
// hash table for storing all known transaction id strings
Hashtable transactionIDHash = new Hashtable();
long transactionIDCounter = 1; // note: numbering really starts w/ "1" !
// read file data into memory structure
MiningVector miningVector;
String transactionIDString;
while(processedStream.next())
{
miningVector = processedStream.read();
// extract transaction id
// transaction id is converted to a number using a hash table (all known tids are simply numbered)
transactionIDString = new String(miningVector.toVector().elementAt(0).toString());
// if transaction id is found in hash table, extract corresponding value
// otherwise, t-id and new value are added to the hashtable
if (transactionIDHash.containsKey(transactionIDString))
{
globalBlockSortList.transactionIDNumber = new Long(transactionIDHash.get(transactionIDString).toString()).longValue();
} else
{
transactionIDHash.put(transactionIDString, new Long(transactionIDCounter));
globalBlockSortList.transactionIDNumber = transactionIDCounter;
transactionIDCounter++;
}
// get key of item id
globalBlockSortList.itemIDkey = miningVector.getValue(1);
globalBlockSortList.next = new GlobalBlockListClass(); // note: this yields a single unused element, but avoids an "if (ptr==null)" in the while loop
globalBlockSortList = globalBlockSortList.next;
globalBlockValidEntries++;
}
globalBlockSortList = anchor;
anchor = null;
// reserve continuous sort buffer in memory
globalBlockSortArray = new GlobalBlockBaseClass[globalBlockValidEntries];
// copy contents of list to sort buffer
for (int i=0;i<globalBlockValidEntries;i++)
{
globalBlockSortArray[i] = new GlobalBlockBaseClass();
globalBlockSortArray[i].itemIDkey = globalBlockSortList.itemIDkey;
globalBlockSortArray[i].transactionIDNumber = globalBlockSortList.transactionIDNumber;
globalBlockSortList = globalBlockSortList.next;
}
// this is the sorting part: a stable merge sort implementation is fed
// w/ the data structure and a fitting comparator
long start = ( new java.util.Date() ).getTime();
Arrays.sort(globalBlockSortArray, new GlobalBlockComparator());
long end = ( new java.util.Date() ).getTime();
sortTimeTaken = end - start;
// read cursor (cursor for providing data elements)
globalBlockCursor = 0;
} // end of initialisation
// are there more vectors available in the block?
if(globalBlockCursor<globalBlockValidEntries)
{
double instance[] = new double[3];
// convert data from sorting block back to mining vectors
// this is simple for the item-id, because it's a "stored" type and we have stored the key in memory
// but a the transaction id needs to be generated anew, because it's an "unstored" type
// (an besides there's no true reference available anyways)
MiningAttribute attributeTransactionID = processedStream.metaDataProcessed.getMiningAttribute("transactionId");
Category transactionCat = new Category( ""+globalBlockSortArray[globalBlockCursor].transactionIDNumber );
double d0 = ((CategoricalAttribute)attributeTransactionID).getKey( transactionCat );
instance[0] = Category.isMissingValue(d0) ? ((CategoricalAttribute)attributeTransactionID).addCategory( transactionCat ): d0;
instance[1] = globalBlockSortArray[globalBlockCursor].itemIDkey;
// numerical attribute is not yet initialised, still
instance[2] = Category.MISSING_VALUE;
nextMiningVector = new MiningVector(instance);
nextMiningVector.setMetaData(processedStream.metaDataProcessed);
nextTransactionID = globalBlockSortArray[globalBlockCursor].transactionIDNumber;
globalBlockCursor++;
return true;
} else
return false;
}
/**
* next() implementation based on an algorithm that decomposes the data and processes it in equally sized chunks.<BR>
* The data is processed sequentially in non-overlapping blocks, which induces
* a certain amount of errors ("artificial" transactions); the advantage of
* this algorithm is the linear runtime (vs. L) and constant amount of memory
* required, so it can be applied to sources of any size.
*
* @throws MiningException
* @return boolean
*/
private boolean decompBlockNext() throws MiningException
{
// check if block contains data
if (!decompBlockDataPresent)
{
decompBlockDataPresent = true;
// uninitialised means we have to ensure, the input stream is ready for reading (start of stream)
if (!decompBlockInitialised)
{
processedStream.reset();
decompBlockInitialised = true;
}
decompBlockEntries = 0;
decompBlockCursor = 0;
Hashtable transactionIDHash = new Hashtable();
// initialise TID counter w/ a value that will not clash w/ any earlier TID number
int transactionIDCounter = decompBlockMaxTransactionNumber;
// System.out.println("memory " + Runtime.getRuntime().totalMemory()+ " free = " + Runtime.getRuntime().freeMemory());
// read a block from file
MiningVector miningVector;
int i = 0;
String transactionIDString;
while ((decompBlockEntries<decompBlockBlockSize)&&(processedStream.next()))
{
decompBlockEntries++;
miningVector = processedStream.read();
// extract transaction id
// transaction id is converted to a number using a hash table (all known tids are simply numbered)
transactionIDString = new String(miningVector.toVector().elementAt(0).toString());
if (transactionIDHash.containsKey(transactionIDString)) {
decompBlockSortArray[i].transactionIDNumber = new Long(transactionIDHash.get(transactionIDString).toString()).intValue();
}
else {
transactionIDCounter++;
transactionIDHash.put(transactionIDString, new Long(transactionIDCounter));
decompBlockSortArray[i].transactionIDNumber = transactionIDCounter;
// in case of debugging "new" transaction id is checked versus global hashtable
if(decompBlockDebug)
{
if (decompBlockDebugHashtable.containsKey(transactionIDString))
{
// in this case a wrong association happened
decompBlockDebugWrongSingle++;
// check if "wrong" session was already referenced
if (!((Boolean)decompBlockDebugHashtable.get(transactionIDString)).booleanValue())
{
decompBlockDebugHashtable.remove(transactionIDString);
decompBlockDebugHashtable.put(transactionIDString, new Boolean(true));
} else
decompBlockDebugWrongMulti++;
}
else
// also add transaction id to global hashtable
decompBlockDebugHashtable.put(transactionIDString, new Boolean(false));
}
}
// copy key of item
decompBlockSortArray[i].itemIDkey = miningVector.getValue(1);
i++;
}
// remember new "highest" transaction number
decompBlockMaxTransactionNumber = transactionIDCounter;
// reached end of file (we were unable to read any more records)
if (decompBlockEntries==0)
return false;
// last block is typically incomplete, so padding is required as sorting algorithm does not accept comparator and limiting indices at once
if(decompBlockEntries<decompBlockBlockSize)
for (i=decompBlockEntries;i<decompBlockBlockSize;i++)
decompBlockSortArray[i].transactionIDNumber = java.lang.Integer.MAX_VALUE;
// sort data
long start = ( new java.util.Date() ).getTime();
Arrays.sort(decompBlockSortArray, new DecompBlockComparator());
long end = ( new java.util.Date() ).getTime();
sortTimeTaken += end - start;
}
// initialisation part complete; return data
double instance[] = new double[3];
// convert data from sorting block back to mining vectors
MiningAttribute attributeTransactionID = processedStream.metaDataProcessed.getMiningAttribute("transactionId");
Category transactionCat = new Category( ""+decompBlockSortArray[decompBlockCursor].transactionIDNumber );
double d0 = ((CategoricalAttribute)attributeTransactionID).getKey( transactionCat );
instance[0] = Category.isMissingValue(d0) ? ((CategoricalAttribute)attributeTransactionID).addCategory( transactionCat ): d0;
instance[1] = decompBlockSortArray[decompBlockCursor].itemIDkey;
// numerical attribute is not yet initialised, still
instance[2] = Category.MISSING_VALUE;
nextMiningVector = new MiningVector(instance);
nextMiningVector.setMetaData(processedStream.metaDataProcessed);
nextTransactionID = decompBlockSortArray[decompBlockCursor].transactionIDNumber;
decompBlockCursor++;
// check if current entry was the last available
if(decompBlockCursor>=decompBlockEntries)
{
decompBlockDataPresent = false;
}
return true;
}
/**
* next() implementation based on an algorithm that decomposes the data and processes it in a floating window.<BR>
* The data is processed in a list (finite specified length, uses variable )
*
* @throws MiningException
* @return boolean
*/
private boolean decompNext() throws MiningException
{
MiningVector miningVector;
DecompHashClass miningVectorStorageData;
DecompListClass miningVectorListEntry;
// read at least one entry into list
// initially, list is filled up w/ until number of elements equals decompWindowSize
// after init/ list size is always decompWindowSize-1, so one entry is read per cycle until p/.next() returns false
while ((decompListEntries <= decompWindowSize)&&(processedStream.next()))
{
long start = ( new java.util.Date() ).getTime();
// add entry to list
// read mining vector
miningVector = processedStream.read();
decompListEntries++;
// create new entry for list
miningVectorListEntry = new DecompListClass();
miningVectorListEntry.nextElement = null;
miningVectorListEntry.itemIDkey = miningVector.getValue(1);
// extract tid string
String transactionIDString = miningVector.toVector().elementAt(0).toString();
// identical - check which one is faster later
// transactionIDString = ((CategoricalAttribute)miningVector.getMetaData().getMiningAttribute(0)).getCategory(miningVector.getValue(0)).getDisplayValue();
// copy pointer to string - string is required for "reverse"-lookup in hashtable
miningVectorListEntry.transactionIDString = transactionIDString;
// if transaction id is found in hash table, extract corresponding object for storage
// otherwise, t-id and new value are added to the hashtable
if (decompHashtable.containsKey(transactionIDString))
{
// retrieves stored object
miningVectorStorageData = (DecompHashClass)decompHashtable.get(transactionIDString);
miningVectorListEntry.transactionIDNumber = miningVectorStorageData.transactionIDNumber;
// update decompListLastElement if applicable
if (decompListLastElement == miningVectorStorageData.lastElement)
decompListLastElement = miningVectorListEntry;
// set correct next element for current entry
miningVectorListEntry.nextElement = miningVectorStorageData.lastElement.nextElement;
// add miningVectorListEntry to list (list cannot be empty)
// update [miningVectorStorageData.lastElement]
miningVectorStorageData.lastElement.nextElement = miningVectorListEntry; // entry is going to be the "new" last entry in the list
// update miningVectorStorageData.lastElement
miningVectorStorageData.lastElement = miningVectorListEntry;
// decompHashtable.put(transactionIDString, miningVectorStorageData);
// - Optimize
// we have to try to move all entries of the current transaction to the end of the list;
// however, if entries from the current transaction have been "supplied" already,
// they can only be appended to where they belong
// also, if current entry is already at the bottom of the list, there's nothing to do as well
if ((nextTransactionID != miningVectorListEntry.transactionIDNumber)&&(decompListLastElement != miningVectorListEntry)&&(decompOptimize))
{
// fetch control entry of next transaction in list
DecompHashClass nextControlEntry = (DecompHashClass)decompHashtable.get(miningVectorStorageData.lastElement.nextElement.transactionIDString);
// in case this is not the first transaction "part" in the list, the list needs to be corrected
if (miningVectorStorageData.previousTransactionIDString != null)
{
// "exclude" the part with the current transaction entries from the list
DecompHashClass preceedingControlEntry = (DecompHashClass)decompHashtable.get(miningVectorStorageData.previousTransactionIDString);
preceedingControlEntry.lastElement.nextElement = miningVectorStorageData.lastElement.nextElement;
// update previousTransactionIDString of previously following transaction in list
nextControlEntry.previousTransactionIDString = miningVectorStorageData.previousTransactionIDString;
} else
{
// the current part was the first in the list, so the list anchor needs to be updated
decompListFirstElement = miningVectorStorageData.lastElement.nextElement;
// no more preceeding transactions in the list
nextControlEntry.previousTransactionIDString = null;
}
// update previousTransactionIDString of current control entry
miningVectorStorageData.previousTransactionIDString = decompListLastElement.transactionIDString;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -