📄 logfilestream.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2004 Prudential Systems AG
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @author Michael Krautmacher (michael@krautmacher.com)
* @version 1.1
*/
package com.prudsys.pdm.Input.Records.Log;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Enumeration;
import java.util.Vector;
import org.apache.oro.text.perl.Perl5Util;
import org.apache.oro.text.regex.MalformedPatternException;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.CategoryProperty;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningFileStream;
import com.prudsys.pdm.Input.MiningVector;
/**
* Stream that reads from webserver logfiles. They are recognized
* automatically. In addition, filters for logfile columns
* can be defined.
*/
public class LogFileStream extends MiningFileStream
{
private String logfileRecord;
private BufferedReader buffer;
private Perl5Util perl;
private LogFileRecognizer logFileMetaData;
private boolean isOpen;
private String[] filter;
private boolean filterInitialised;
/**
* Constructor for given logfile.
*
* @param file logfile to read
*/
public LogFileStream( String file )
{
perl = new Perl5Util();
fileName = file;
filterInitialised = false;
}
/**
* Returns supported strteam methods.
*
* @return supported stream methods
*/
@SuppressWarnings("unchecked")
public Enumeration getSupportedStreamMethods() {
Vector suppmeth = new Vector();
suppmeth.addElement("recognize");
suppmeth.addElement("reset");
return suppmeth.elements();
}
/**
* Recognizes logfile format.
*
* @return meta data corresponding to logfile
* @throws MiningException cannot recognize logfile
*/
public MiningDataSpecification recognize() throws MiningException
{
try
{
logFileMetaData = new LogFileRecognizer( fileName );
@SuppressWarnings("unused") int recType = logFileMetaData.recognize();
metaData = logFileMetaData.buildMiningDataSpecification();
}
catch (IOException ex)
{
throw new MiningException( ex.getMessage() );
}
catch (MalformedPatternException mpex)
{
throw new MiningException( mpex.getMessage() );
}
return metaData;
}
/**
* Open logfile for reading.
*
* @throws MiningException connot open logfile
*/
public void open() throws MiningException
{
if (isOpen)
{
reset();
return;
}
try
{
if( metaData == null )
{
recognize();
}
FileReader reader = new FileReader( fileName );
buffer = new BufferedReader( reader, 1048576 );
isOpen = true;
}
catch (IOException ex)
{
throw new MiningException( ex.getMessage() );
}
}
/**
* Close logfile.
*
* @throws MiningException cannot close logfile
*/
public void close() throws MiningException
{
try
{
buffer.close();
isOpen = false;
}
catch(IOException ex)
{
throw new MiningException( ex.getMessage() );
}
}
/**
* Advances cursor by one position. Note that if filter is
* set, the lines of the logfile which do not match the
* filter condition will be ignored.
*
* @return next position of mining vector to read
* @throws MiningException cannot advance cursor position
*/
public boolean next() throws MiningException
{
try
{
do
{
logfileRecord = buffer.readLine();
if( logfileRecord == null )
{
return false;
}
}
while( logfileRecord.equalsIgnoreCase("") || logfileRecord.startsWith( "#" ) || filter() );
return true;
}
catch(IOException ex)
{
throw new MiningException( ex.getMessage() );
}
}
/**
* Sets cursor position before first data set.
*
* @throws MiningException operation failed
*/
public void reset() throws MiningException
{
try
{
if( buffer != null) buffer.close();
FileReader reader = new FileReader( fileName );
buffer = new BufferedReader( reader, 1048576 );
}
catch( IOException ex )
{
throw new MiningException( ex.getMessage() );
}
}
/**
* Move cursor to specified position.
*
* @param position target position of cursor
* @return true if next vector exist, otherwise false
* @throws MiningException cannot move cursor
*/
public boolean move( int position ) throws MiningException
{
return true;
}
/**
* Reads current mining vector.
*
* @return mining vector at current cursor position
* @throws MiningException
*/
public MiningVector read() throws MiningException
{
return cursorVector;
}
/**
* Reads line of logfile and stores it to mining vector that can
* be accessed via the read method, if filter condition not
* satisfied. Otherwise return true.
*
* @return false if line can be read as vector, true if to switch to
* next line
*/
private boolean filter()
{
// it is possible this function is called w/out the filter beeing initialized,
// so ensure we don't get a nullPointerException from that
if (filterInitialised==false)
{
this.filter = new String[metaData.getAttributesNumber()];
filterInitialised = true;
}
Vector vector = new Vector( 30 );
String separator = logFileMetaData.getLogFileSeparator();
System.out.println("separator = " + separator.toString());
perl.split( vector, separator, logfileRecord );
double[] instance = new double[ metaData.getAttributesNumber() ];
// Get values for all attributes.
int n = metaData.getAttributesNumber();
int k = vector.size();
for(int i = 0; i < n; i++)
{
MiningAttribute attribute = metaData.getMiningAttribute( i );
String attributeValue;
if( i > k - 1 )
{
attributeValue = "-";
}
else
{ // required to strip down strings, because perl "split()" functions delivers 257-char strings
attributeValue = new String((String)vector.get(i));
//<< added by Joyce 2005/03/30
if (attributeValue == null)
attributeValue = "-";
else if (attributeValue.trim().length() <=0){
attributeValue = "-";
}
//>>
}
if( filter[i] != null && perl.match( "/" + filter[i] + "/", attributeValue ) )
{
return true;
}
if(attribute instanceof CategoricalAttribute)
{
if( attributeValue.equalsIgnoreCase( "-" ) || attributeValue.trim().length() <=0 || attributeValue.trim().equalsIgnoreCase( "-" )) // modifiedy by Joyce 2005/03/30
{
instance[i] = Category.MISSING_VALUE;
}
else
{
Category cat = new Category( attributeValue, attributeValue, new CategoryProperty() );
double d = ((CategoricalAttribute)attribute).getKey( cat );
if( Double.isNaN( d ) )
{
instance[i] = ((CategoricalAttribute)attribute).addCategory( cat );
}
else
{
instance[i] = d;
}
}
}
else
{
if(attribute instanceof NumericAttribute)
{
if (attributeValue.trim().length() <=0 ) //added by Joyce 2005/03/31
instance[i] = Category.MISSING_VALUE;
else
{
try
{
instance[i] = Double.parseDouble( attributeValue );
}
catch(NumberFormatException ex)
{
instance[i] = Category.MISSING_VALUE;
}
}
}
}
}
// Add instance to dataset
cursorVector = new MiningVector(instance);
cursorVector.setMetaData( metaData );
return false;
}
/**
* Creates a dump of the mining input stream readed in ARFF format.
*
* @param path path for dump file
* @throws MiningException cannot create dump file
*/
public void dump( String path ) throws MiningException
{
if( isOpen )
{
reset();
}
else
{
open();
}
try
{
FileWriter writer = new FileWriter( path );
BufferedWriter buffer = new BufferedWriter( writer, 524288 );
buffer.write( metaData.createArffDescription() + "\n" );
buffer.write( "@data" + "\n" );
MiningVector miningVector;
while( next() )
{
miningVector = read();
buffer.write( miningVector.toString() + "\n" );
}
buffer.close();
}
catch (IOException ex)
{
throw new MiningException( ex.getMessage() );
}
}
/**
* Sets new filter array for logfile.
*
* @param filter new filter array for logfile
*/
public void filter(String[] filter)
{
this.filter = filter;
filterInitialised = true;
}
/**
* Main method for tests.
*
* @param args arguments (ignored)
*/
public static void main(String[] args)
{
try
{
// String fileName = "config\\config.properties";
String fileName = "data\\logs\\NCSA Combined Log File Format.log";
// String fileName = "data\\logs\\NCSA Common Log File Format.log";
// String fileName = "data\\logs\\Extended Log File Format.log";
// String fileName = "data\\logs\\IIS Log File Format.log";
// String fileName = "data\\logs\\Intershop Log File Format.log";
// String fileName = "data\\logs\\Shop Log File Format.log";
// String fileName = "data\\logs\\UNRECOGNIZED.log";
// String fileName = "data\\logs\\access-2.log";
LogFileStream converter = new LogFileStream( fileName );
MiningDataSpecification metaData = converter.recognize();
String[] filter = new String[ metaData.getAttributesNumber() ];
if (filter.length > 8) filter[8] = "navob_agb_over-27.gif";
converter.filter( filter );
converter.open();
while( converter.next() )
{
MiningVector miningVector = converter.read();
System.out.println(miningVector);
}
converter.reset();
converter.dump( "data\\logs\\log.arff" );
converter.close();
System.out.println( "File " + converter.logFileMetaData.getName() + " is an " + converter.logFileMetaData.getDescription() + " log file.");
System.out.println("Log reading was finished.");
}
catch (MiningException ex)
{
ex.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -