📄 logfilesequentialpreprocess.java
字号:
{
// check for sid ("uri" field)
// if present, only one field is used
if ( metaData.getMiningAttribute(FIELD_NAME_URI) != null) {
// get index number, so values can accessed directly
vecNumItemID = metaData.getAttributeIndex(metaData.getMiningAttribute(FIELD_NAME_URI));
}
// no cs-uri; try cs-uri-stem & cs-uri-query pair
else
if ( (metaData.getMiningAttribute(FIELD_NAME_URI_STEM) != null) &&
(metaData.getMiningAttribute(FIELD_NAME_URI_QUERY) != null))
{
vecNumItemID = metaData.getAttributeIndex(metaData.getMiningAttribute(FIELD_NAME_URI_STEM));
vecNumItemID2 = metaData.getAttributeIndex(metaData.getMiningAttribute(FIELD_NAME_URI_QUERY));
}
else
if (metaData.getMiningAttribute(FIELD_NAME_URI_STEM) != null)
{
vecNumItemID = metaData.getAttributeIndex(metaData.getMiningAttribute(FIELD_NAME_URI_STEM));
}
else
// no useful attribute found ...
throw new MiningException("Cannot extract item ID from meta data.");
}
}
/**
* Set the field number of the item ID (session ID) from the mining vector
*
* @param vecNum field number of item ID in mining vector
* Parameter can be set to "DEFAULT_FIELD_NUM" for autodetection
* @param vecNum2 second field number of item ID in mining vector
* If only one field is required, set to "DEFAULT_FIELD_NUM"
* Can be set to "DEFAULT_FIELD_NUM" to use the default expression
* Note that the values of fields one and two will be concatenated, before the evaluation expression is used
*
* @throws MiningException
*/
public void setItemIDParameters(int vecNum, int vecNum2) throws MiningException
{
vecNumItemID = vecNum;
vecNumItemID2 = vecNum2;
setItemIDParameters();
}
/**
* Extract a resource string from a mining vector
*
*
* @param miningVector MiningVector
* @param elementNum1 first field number of field to be extracted
* @param elementNum2 second field number
* @throws MiningException
* @return String resource string (concatenation of values of field 1 and 2)
*/
private String extractBaseString(MiningVector miningVector, int elementNum1, int elementNum2) throws MiningException
{
String theString;
try
{
theString = miningVector.toVector().elementAt(elementNum1).toString();
} catch (NullPointerException e)
{
System.out.println("Warning: Bad line / Missing element " + elementNum1 + " in log file.");
throw new MiningException("Warning: Bad line / Missing element " + elementNum1 + " in log file.");
}
if (elementNum2!=DEFAULT_FIELD_NUM)
{
try
{
theString = theString + miningVector.toVector().elementAt(elementNum2).toString();
} catch (NullPointerException e)
{
System.out.println("Warning: Bad line / Missing element " + elementNum2 + " in log file.");
throw new MiningException("Warning: Bad line / Missing element " + elementNum2 + " in log file.");
}
}
return theString;
}
/**
* Extracts the item ID (uri) from the mining vector
*
* @param miningVector mining vector
* @param parseParemNum number of parser string set
* @return itemID
*
* @throws MiningException
*/
private String extractItemID(MiningVector miningVector, int parseParemNum) throws MiningException
{
// retrieve resource string from mining vector
String itemString = extractBaseString(miningVector, vecNumItemID, vecNumItemID2);
// further parsing necessary?
if (((String)vecExtractItemIDX[parseParemNum].elementAt(0)).equals(DEFAULT_VEC_EXTRACT_NONE[0]))
return itemString;
// "kill" filter active?
if(vecExtractItemIDX[parseParemNum].size()>2)
if ((perl.match((String)vecExtractItemIDX[parseParemNum].elementAt(2), itemString)))
throw new MiningException("Note: Item ID was filtered.");
if (!(perl.match((String)vecExtractItemIDX[parseParemNum].elementAt(0), itemString)))
throw new MiningException("Warning: Missing item id in resource string.");
// if (vecEctractItemID >2)
// extract item id from string
Vector tmpVec = new Vector(3);
perl.split(tmpVec, (String)vecExtractItemIDX[parseParemNum].elementAt(0), itemString);
itemString = tmpVec.elementAt(1).toString();
// further parsing necessary?
if (((String)vecExtractItemIDX[parseParemNum].elementAt(1)).equals(""))
return itemString;
Vector tmpVec2 = new Vector(3);
perl.split(tmpVec2, (String)vecExtractItemIDX[parseParemNum].elementAt(1), tmpVec.elementAt(1).toString());
itemString = tmpVec2.elementAt(0).toString();
// "match" filter active?
if(vecExtractItemIDX[parseParemNum].size()>3)
if (!(perl.match((String)vecExtractItemIDX[parseParemNum].elementAt(3), itemString)))
throw new MiningException("Note: Item ID has invalid format.");
return itemString;
}
/**
* Creates a stripped down version of the input stream that comprises the
* elements required for sequential analysis, only.
*
* @param path path for new file
* @throws MiningException cannot create file
*/
public void dump( String path ) throws MiningException
{
open();
// counter for statistical
numLinesInvalid = 0;
numLinesCount = 0;
try
{
FileWriter writer = new FileWriter( path );
BufferedWriter buffer = new BufferedWriter( writer, 524288 );
// analog to "metaData.createArffDescription()":
// create header for arff file
String description = "@relation 'SequentialAnalysisFile'\n";
description = description + "@attribute transactionId string\n@attribute itemId string\n@attribute itemIndex real\n";
buffer.write( description + "\n" );
buffer.write( "@data" + "\n" );
MiningVector miningVector;
setTransactionIDParameters();
setItemIDParameters();
while( this.next() )
{
miningVector = this.read();
buffer.write( miningVector.toString() + "\n" );
}
buffer.close();
}
catch (IOException ex)
{
throw new MiningException( ex.getMessage() );
}
System.out.println("Lines total: " + numLinesCount + ", invalid: " + numLinesInvalid + ".");
}
/**
*
*
* @return next position of mining vector to read
* @throws MiningException cannot advance cursor position
*/
public boolean next() throws MiningException
{
// call next function of superclass
while (super.next())
{
// some statisctics
numLinesCount++;
// retrieve mining vector from superclass
MiningVector miningVector = super.read();
// ignore lines where required fields are missing
if (miningVector.isMissing(vecNumItemID) || miningVector.isMissing(vecNumTransactionID))
{
numLinesInvalid++;
continue;
}
if ((vecNumItemID2!=DEFAULT_FIELD_NUM) && (miningVector.isMissing(vecNumItemID2)))
{
numLinesInvalid++;
continue;
}
int paramSetIdx = 0;
if (vecExtractNumX==0)
throw new MiningException("No extraction parameters configured via addExtractIDParameters()!");
while (paramSetIdx < vecExtractNumX)
{
try {
double instance[] = new double[3];
// extract transaction id (session id) from original mining vector
String transactionIDStr = new String(extractTransactionID(miningVector, paramSetIdx));
MiningAttribute attributeTransactionID = metaDataProcessed.
getMiningAttribute("transactionId");
Category transactionCat = new Category(transactionIDStr);
double d0 = ( (CategoricalAttribute) attributeTransactionID).getKey(
transactionCat);
instance[0] = Category.isMissingValue(d0) ?
( (CategoricalAttribute) attributeTransactionID).addCategory(
transactionCat) : d0;
// extract item id (uri / product id) from original mining vector
String itemIDStr = extractItemID(miningVector, paramSetIdx);
MiningAttribute attributeItemID = metaDataProcessed.
getMiningAttribute("itemId");
Category itemCat = new Category(itemIDStr);
double d1 = ( (CategoricalAttribute) attributeItemID).getKey(itemCat);
instance[1] = Category.isMissingValue(d1) ?
( (CategoricalAttribute) attributeItemID).addCategory(itemCat) :
d1;
// numerical attribute is not yet initialised
instance[2] = Category.MISSING_VALUE;
cursorVectorProcessed = new MiningVector(instance);
cursorVectorProcessed.setMetaData(metaDataProcessed);
// System.out.println(cursorVectorProcessed.toString());
// Creation of new mining vector successful!
return true;
}
catch (MiningException e) {
paramSetIdx++;
}
} // while (paramSetIdx <= vecExtractNumX)
numLinesInvalid++;
}
return false;
}
/**
* Reads current mining vector.
*
* @return mining vector at current cursor position
* @throws MiningException
*/
public MiningVector read() throws MiningException
{
return cursorVectorProcessed;
}
/**
* Moves to position in source relative to current cursor position
*
* @param position number of (valid) lines to advance
* @throws MiningException
* @return boolean
*/
public boolean move( int position ) throws MiningException
{
// check parameter (position must be >0)
if (position <=0)
throw new MiningException("Cannot move backwards or advance zero lines in file stream.");
// moving is realised using calls to next()
boolean nextResult = false;
for(int i=0;i<position;i++)
{
nextResult = next();
};
return nextResult;
}
public static void main(String[] args)
{
try
{
// String fileName = "config\\config.properties";
// String fileName = "data\\logs\\NCSA Combined Log File Format.log";
// String fileName = "data\\logs\\NCSA Common Log File Format.log";
String fileName = "data\\logs\\WebShopNCSA.log";
// String fileName = "data\\logs\\Extended Log File Format.log";
// String fileName = "data\\logs\\IIS Log File Format.log";
// String fileName = "data\\logs\\Intershop Log File Format.log";
// String fileName = "data\\logs\\Shop Log File Format.log";
// String fileName = "data\\logs\\UNRECOGNIZED.log";
LogFileSequentialPreprocess converter = new LogFileSequentialPreprocess( fileName );
MiningDataSpecification metaData = converter.recognize();
System.out.println(metaData);
converter.reset();
converter.setTransactionIDParameters(DEFAULT_FIELD_NUM, DEFAULT_FIELD_NUM);
converter.setItemIDParameters(DEFAULT_FIELD_NUM, DEFAULT_FIELD_NUM);
/* int tidIndex = metaData.getAttributeIndex(metaData.getMiningAttribute("c-sid"));
converter.setTransactionIDParameters(tidIndex, DEFAULT_FIELD_NUM );
converter.setItemIDParameters(DEFAULT_FIELD_NUM, DEFAULT_FIELD_NUM);
*/
converter.addExtractIDParameters(DEFAULT_VEC_EXTRACT_NONE, DEFAULT_VEC_EXTRACT_NONE);
converter.dump("data\\logs\\SequentialAnanlysisFile.arff");
converter.close();
System.out.println("Log reading was finished.");
}
catch (MiningException ex)
{
ex.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -