📄 logfileqsort.java
字号:
// mark the new end of the list and add the "floating" part to the end of the list
miningVectorStorageData.lastElement.nextElement = null;
decompListLastElement.nextElement = miningVectorStorageData.firstElement;
// update entry marking the end of the list
decompListLastElement = miningVectorStorageData.lastElement;
}
} else // if (decompHashtable.containsKey(transactionIDString))
{
// create new control entry for hash table
miningVectorStorageData = new DecompHashClass();
miningVectorListEntry.transactionIDNumber = decompTransactionIDCounter;
miningVectorStorageData.transactionIDNumber = decompTransactionIDCounter;
// append miningVectorListEntry to list
if (decompListLastElement != null) // list empty?
{
// remember the transaction ID number of the previously saved transaction
miningVectorStorageData.previousTransactionIDString = decompListLastElement.transactionIDString;
decompListLastElement.nextElement = miningVectorListEntry;
decompListLastElement = miningVectorListEntry;
} else
{
// no previous transaction
miningVectorStorageData.previousTransactionIDString = null;
decompListFirstElement = miningVectorListEntry;
decompListLastElement = miningVectorListEntry;
}
// add control entry to hash table (w/ initialised ptrs)
miningVectorStorageData.firstElement = miningVectorListEntry;
miningVectorStorageData.lastElement = miningVectorListEntry;
decompHashtable.put(transactionIDString, miningVectorStorageData);
decompTransactionIDCounter++;
}
long end = ( new java.util.Date() ).getTime();
sortTimeTaken += end - start;
} // while ((decompListEntries <= decompWindowSize)&&(processedStream.next()))
// ------------------- end of init part ------------------------------------
// if list is empty, this means there are no more entries available
// (local list-"buffer" is empty and p/.next() returned false)
if (decompListEntries == 0)
return false;
double instance[] = new double[3];
// deliver first entry in list
// convert data from sorting block back to mining vectors
MiningAttribute attributeTransactionID = processedStream.metaDataProcessed.getMiningAttribute("transactionId");
Category transactionCat = new Category( ""+decompListFirstElement.transactionIDNumber);
double d0 = ((CategoricalAttribute)attributeTransactionID).getKey( transactionCat );
instance[0] = Category.isMissingValue(d0) ? ((CategoricalAttribute)attributeTransactionID).addCategory( transactionCat ): d0;
instance[1] = decompListFirstElement.itemIDkey;
// numerical attribute is not yet initialised, still
instance[2] = Category.MISSING_VALUE;
nextMiningVector = new MiningVector(instance);
nextMiningVector.setMetaData(processedStream.metaDataProcessed);
nextTransactionID = decompListFirstElement.transactionIDNumber;
// delete entry - update structures
decompListEntries--;
// Update Hashtable (update first element, if applicable delete whole hashtable entry)
// retrieve entry from hashtable
miningVectorStorageData = (DecompHashClass)decompHashtable.get(decompListFirstElement.transactionIDString);
// - Update Hashtable
// check if current entry was last one of transaction recorded in hashtable
if (miningVectorStorageData.firstElement != miningVectorStorageData.lastElement)
{
// no, only update ptr to first entry
miningVectorStorageData.firstElement = miningVectorStorageData.firstElement.nextElement;
} else
{
// before deleting hash table entry update control entry referencing it
if (miningVectorStorageData.lastElement.nextElement != null)
{
DecompHashClass nextControlEntry = (DecompHashClass)decompHashtable.get(miningVectorStorageData.lastElement.nextElement.transactionIDString);
nextControlEntry.previousTransactionIDString = null;
}
// now eliminate whole hashtable entry
decompHashtable.remove(decompListFirstElement.transactionIDString);
}
// - Update decompListFirstElement
decompListFirstElement = decompListFirstElement.nextElement;
return true;
}
/**
* The next() function is overwritten - this function only dispatches the call
* depending on the sorting method selected, so effectively the interface is
* always the same, no matter what algorithm is used.
*
* @throws MiningException
* @return boolean
*/
public boolean next() throws MiningException
{
nextMiningVectorUpdated = true;
long start = ( new java.util.Date() ).getTime();
boolean nextResult;
switch (sortingMethod)
{
case METHOD_TRIVIAL:
nextResult = trivialNext();
break;
case METHOD_GLOBALBLOCK:
nextResult = globalBlockNext();
break;
case METHOD_DECOMPBLOCK:
nextResult = decompBlockNext();
break;
case METHOD_DECOMP:
nextResult = decompNext();
break;
default:
throw new MiningException("Function next(): Unknown sorting method requested (" + sortingMethod + ").");
}
// add time taken
totalTimeTaken += (( new java.util.Date() ).getTime() - start);
return nextResult;
}
/**
* Reset the (sorted) data stream
*
* @throws MiningException
*/
public void reset() throws MiningException
{
processedStream.reset();
// initialise new session
itemIndexCurrent = 1;
transactionIDLastSeen = -1;
setSortingMethod(sortingMethod);
}
/**
* Recognize log file. This is done autonomously (triggered by a preprocessor class used).
* Calling this function will always result in an exception
*
* @return -dummy-, exception is always thrown
* @throws MiningException (never thrown)
*/
public MiningDataSpecification recognize() throws MiningException
{
throw new MiningException("Not implemented");
}
/**
* Open logfile for reading.
* This function doesn't do anything, because the preprocessor used by this
* class handles the underlying logstream autonomously.
*
* @throws MiningException (never thrown)
*/
public void open() throws MiningException
{
}
/**
* Close logfile.
* This function doesn't do anything, because the preprocessor used by this
* class handles the underlying logstream autonomously.
*
* @throws MiningException (never thrown)
*/
public void close() throws MiningException
{
}
/**
* Function always throws exception.
*
* @throws MiningException (always thrown)
*/
public void findPhysicalModel() throws MiningException
{
throw new MiningException("findPhysicalModel() is not implemented.");
}
/**
* Function always throws exception.
*
* @throws MiningException (always thrown)
* @return org.omg.cwm.analysis.transformation.TransformationMap
*/
public org.omg.cwm.analysis.transformation.TransformationMap getPhysicalToLogicalModelTransformation() throws MiningException
{
throw new MiningException("getPhysicalToLogicalModelTransformation() is not implemented.");
}
/**
* Returns supported stream methods
* Only "reset" is supported
*
* @return supported stream methods
*/
public Enumeration getSupportedStreamMethods()
{
Vector suppmeth = new Vector();
suppmeth.addElement("reset");
return suppmeth.elements();
}
public static void main(String[] args)
{
try {
// String fileName = "config\\config.properties";
// String fileName = "data\\logs\\NCSA Combined Log File Format.log";
// String fileName = "data\\logs\\NCSA Common Log File Format.log";
String fileName = "data\\logs\\WebShopNCSA.log";
// String fileName = "data\\logs\\Extended Log File Format.log";
// String fileName = "data\\logs\\IIS Log File Format.log";
// String fileName = "data\\logs\\Intershop Log File Format.log";
// String fileName = "data\\logs\\Shop Log File Format.log";
// String fileName = "data\\logs\\UNRECOGNIZED.log";
LogFileQSort converter = new LogFileQSort(fileName);
// configure "parsing tree"
LogFileQParse.LogFileQParseAction extrquote = converter.getPreprocessor().addParseActionExtract(null, true, true, true, LogFileQParse.PARSE_EXTRACT,1,'\"',1,'\"');
LogFileQParse.LogFileQParseAction extrtransac = converter.getPreprocessor().addParseActionPerl(extrquote, true, true, false, LogFileQParse.PARSE_PERL_MATCHEXTRACT, "/[0-9]{4,10}\\-[0-9]{8}/");
LogFileQParse.LogFileQParseAction extritem21 = converter.getPreprocessor().addParseActionPerl(extrtransac, false, false, true, LogFileQParse.PARSE_PERL_MATCHEXTRACT, "/\\.cgi\\?id=[0-9]{2,20}&p=3/");
LogFileQParse.LogFileQParseAction extritem22 = converter.getPreprocessor().addParseActionExtract(extritem21, true, false, true, LogFileQParse.PARSE_EXTRACT,1,'=',1,'&');
LogFileQParse.LogFileQParseAction extritem11 = converter.getPreprocessor().addParseActionPerl(extritem21, false, false, true, LogFileQParse.PARSE_PERL_MATCHEXTRACT, "/3\\-[0-9]{2,20}\\-[1-9].html/");
LogFileQParse.LogFileQParseAction extritem12 = converter.getPreprocessor().addParseActionExtract(extritem11, true, false, true, LogFileQParse.PARSE_EXTRACT, 1,'-',1,'-');
// for IIS format
/* LogFileQParse.LogFileQParseAction extrtransac = converter.getPreprocessor().addParseActionPrefix(null, true, true, false, LogFileQParse.PARSE_PREFIX,',');
LogFileQParse.LogFileQParseAction extrquote = converter.getPreprocessor().addParseActionExtract(extrtransac, false, false, true, LogFileQParse.PARSE_EXTRACT,1,'/',1,',');
*/
int method = METHOD_DECOMP;
converter.setSortingMethod(method);
//converter.decompOptimize = false;
converter.setBlockSize(10000);
converter.setItemIDType( CategoricalAttribute.INTEGER ); // items as integers
System.out.println("Analysing...");
// overall timer - time taken for parsing AND sorting is measured
long start = ( new java.util.Date() ).getTime();
MiningVector miningVector;
MiningDataSpecification metaData = converter.getMetaData();
CategoricalAttribute itemId = (CategoricalAttribute) metaData.getMiningAttribute(1);
System.out.println("log meta Data: " + metaData);
long i=0;
while( converter.next() )
{
i++;
miningVector = converter.read();
converter.read();
if (i<100) { // do not output long logs to screen
System.out.print(miningVector);
// Check and show item type
double ival = miningVector.getValue(1);
Category cat = itemId.getCategory(ival);
String itype = "string";
if ( cat.getValue() instanceof Integer )
itype = "int";
System.out.println(" => item = " + cat + " itype = " + itype);
}
}
if (i>=100) // do not output ultra-long logs to screen
System.out.print("\nLast Vector:\n" + converter.read());
long end = ( new java.util.Date() ).getTime();
long programTime = end - start;
System.out.println("\nLog reading was finished.\n");
System.out.println("Lines Total: " + converter.processedStream.numLinesCount + ", invalid lines: " + converter.processedStream.numLinesInvalid);
System.out.println("\nSorting took " + converter.sortTimeTaken + " ms, reading and parsing took "+(converter.totalTimeTaken - converter.sortTimeTaken) + " ms, total runtime = " + programTime + " ms.\n");
if (LogFileQSort.decompBlockDebug)
System.out.println("Wrong (artificial) transactions: " + converter.decompBlockDebugWrongSingle + ", appearing in multiple blocks: " + converter.decompBlockDebugWrongMulti);
if (method == METHOD_DECOMP)
System.out.println("TID Counter: " + (converter.decompTransactionIDCounter-1));
}
catch (MiningException ex) {
ex.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -