📄 logfilerecognizer.java
字号:
// it means: find all lines with any symbols between '#' and ':' in the begining
regularExpr = "^#\\w*:";
// set field separator to devide directive line on two substring - directive name and directive value
processor.setFieldSeparator( "[#:]" ); // ""[#:]""
processor.addAction(regularExpr, new MatchAction()
{
@SuppressWarnings("unchecked")
public void processMatch(MatchActionInfo info)
{
logFileDirectives.put( (String)info.fields.get(1) , (String)info.fields.get(2) );
}
});
FileInputStream fis = new FileInputStream( file );
processor.processMatches(fis, System.out);
fis.close();
//LogFileUtils.print( logFileDirectives );
return logFileDirectives;
}
/**
* Creates a numeric attribute (integer, > 0)
* (attribute is generic in the given szenario)
* <p>
* @param attrMDS Mining Data Specification
* @param attrName of Attribute to be added
*/
private void addGenericNumericAttribute(MiningDataSpecification attrMDS, String attrName)
{
NumericAttribute attribute = new NumericAttribute( attrName );
attribute.setDataType(NumericAttribute.INTEGER);
attribute.setLowerBound( 0 );
attrMDS.addMiningAttribute( attribute );
}
/**
* Creates a categoric attribute
* <p>
* @param attrMDS Mining Data Specification
* @param attrName of Attribute to be added *
*/
private void addCategoricAttribute(MiningDataSpecification attrMDS, String attrName)
{
CategoricalAttribute attribute = new CategoricalAttribute( attrName );
// Modified by Kelvin Jor. UnstoredCategories cause problem in LogFileStream read line. So use UnboundedCategories instead
//attribute.setUnstoredCategories(true);
attribute.setUnboundedCategories(true);
attrMDS.addMiningAttribute( attribute );
}
/**
* Implementations of the W3C ELFF standard typically use the scheme "prefix(Header)"
* for HTTP header field identifiers instead of the form "prefix-identifier".
* There are also some ambiguities (due to optional prefixes).
* The attribute names are checked and the issues mentioned are resolved,
* which makes the naming scheme much more stringent.
* <p>
* @param attrName Name of attribute
* @return New name of attribute
*/
private String remapElffName(String attrName)
{
// change alternative scheme "prefix(Header)" to "prefix-identifier"
attrName = attrName.replace('(','-').replace(')',' ').trim().toLowerCase();
// remap to avoid ambiguity
if (attrName.equalsIgnoreCase( "sc-bytes" ))
return "bytes";
return attrName;
}
/**
* Returns meta data corresponding to recognized logfile.
*
* @return meta data of logfile
*/
public MiningDataSpecification buildMiningDataSpecification( )
{
MiningDataSpecification dataSpecification = new MiningDataSpecification();
dataSpecification.setRelationName( getLogFileNameNoExtension() + " (" + this.getDescription() + ")" );
fieldsNames = fieldsNames.trim();
Vector splitNames = new Vector();
perl.split( splitNames, fieldsNames );
int splitNamesSize = splitNames.size();
String name;
for(int i = 0; i < splitNamesSize; i++)
{
name = (String)splitNames.get(i);
// ensure name conforms to naming scheme
name = remapElffName(name);
// map a few interesting identifiers manually
if( name.equalsIgnoreCase( "sc-status" ) )
addGenericNumericAttribute(dataSpecification, name);
else
if( name.equalsIgnoreCase( "bytes" ) )
addGenericNumericAttribute(dataSpecification, name);
else
if( name.equalsIgnoreCase( "cs-bytes" ) )
addGenericNumericAttribute(dataSpecification, name);
else
if( name.equalsIgnoreCase( "time-taken" ) )
addGenericNumericAttribute(dataSpecification, name);
else
if( name.equalsIgnoreCase( "sc-win32-status" ) )
addGenericNumericAttribute(dataSpecification, name);
else
addCategoricAttribute(dataSpecification, name);
}
return dataSpecification;
}
/**
* Create log file fields string for the unrecognized log file
* <p>
* @return String instance containing log file fields.
*/
private String buildLogFileFieldsUnrecognized() {
String fieldsNames = "";
for( int i = 0; i < logFileUnrecognizedFields; i++ )
{
fieldsNames = fieldsNames + "column" + i + " ";
}
return fieldsNames;
}
/**
* Check log file header to recognize if there are some special fields.
* <p>
* @param logFileDirectives Log file directives and their values.
* @return int containing log file type.
*/
private int checkLogFileDirectives( Hashtable logFileDirectives )
{
Vector directiveFields = new Vector();
// check if directives contain string "#Fields:"
if(logFileDirectives != null && logFileDirectives.containsKey(DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT))
{
perl.split( directiveFields, "/ /", (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT ) );
// if log file is corrupted.
if(directiveFields.isEmpty())
{
return NO_DIRECTIVES_LOG_FILE_FORMAT;
}
else
{
// Check if the fields list contains special symbol that specifies meta log file format:
if ( search( directiveFields, LogFileRecognizer.ZSOFT_META_LOG_FILE_FORMAT_FIELDNAME ) )
{
logFileSeparator = "/[;]+/";
fieldsNames = (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT );
description = "ZSoft Meta Log File Format";
return ZSoft_Meta_Log_File_Format;
};
// Check if the fields list contains special symbol that specifies shop log file format:
if( search( directiveFields, SHOP_LOG_FILE_FORMAT_FIELDNAME ) )
{
logFileSeparator = "/[ ]+/";
fieldsNames = (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT );
description = "Shop Log File Format";
return Shop_Log_File_Format;
}
else
{
logFileSeparator = "/[\t]+/";
fieldsNames = (String)logFileDirectives.get( DIRECTIVE_FIELDS_EXTENDED_LOG_FILE_FORMAT );
description = "W3C Extended Log File Format";
return Extended_Log_File_Format;
}
}
}
else
{
return NO_DIRECTIVES_LOG_FILE_FORMAT;
}
}
/**
* Check log file header to recognize if there are some special fields.
* <p>
* @param pattern Pattern to find in the log file.
* @param file Log file.
* @return boolean containing true, if there are at least one line matches this pattern.
* @throws MalformedPatternException Throws if there are some problems while recognizing a log file.
* @throws IOException Throws if there are some problems while readind from log file.
* @throws FileNotFoundException Throws if cann't find log file.
*/
private boolean checkSpecificPatternsInLogFile( String pattern, File file ) throws MalformedPatternException, FileNotFoundException, IOException
{
MatchActionProcessor processor = new MatchActionProcessor();
logFileCounter = 0;
processor.addAction(pattern, new MatchAction()
{
public void processMatch(MatchActionInfo info)
{
logFileCounter++;
}
});
FileInputStream fis = new FileInputStream( file );
byte[] buf = new byte[ bytesToRecognize ];
fis.read( buf );
fis.close();
ByteArrayInputStream bais = new ByteArrayInputStream( buf );
processor.processMatches( bais, System.out );
bais.close();
return logFileCounter > 0 ? true:false;
}
/**
* Searches for string in the vector.
* <p>
* @param v The vector where search.
* @param s The string to search for.
* @return boolean containing true if vector includes this string, false otherwise.
*/
private static boolean search(Vector v, String s)
{
Enumeration em = v.elements();
String temp = null;
while(em.hasMoreElements())
{
temp = (String)em.nextElement();
if(temp.equals(s))
{
return true;
}
}
return false;
}
/**
* Returns number of bytes required to recognize logfile.
*
* @return number of bytes required to recognize logfile
*/
public int getBytesToRecognize()
{
return bytesToRecognize;
}
/**
* Returns logfile type.
*
* @return logfile type
*/
public int getLogFileType()
{
return logFileType;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -