📄 okcloader.java
字号:
package chen.macroweka.core.converters;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.FastVector;
import weka.core.Attribute;
import weka.core.converters.*; // pancher add
import java.io.*;
/**
* Implementation of class
*
*/
public class OKCLoader extends AbstractLoader
implements BatchLoader, IncrementalLoader
{
/**
* Holds the determined structure (header) of the data set.
*/
protected Instances m_structure = null;
/**
* Holds the meta information of the data set. In this case, the meta file
* of the data set. m_sourceDataFile is the data file.
*/
protected File m_sourceMetaFile = null;
/**
* Holds the source of the data set. In this case the okc file of the
* data set.
*/
private File m_sourceDataFile = null;
/**
* Reader for names file
*/
private transient Reader m_metaReader = null;
/**
* Reader for data file
*/
private transient Reader m_dataReader = null;
/**
* Holds the filestem.
*/
private String m_fileStem;
/**
* Number of attributes in the data (including ignore and label attributes).
*/
private int m_numAttribs;
private int m_numInstances;
/**
* Returns a string describing this attribute evaluator
*
* @return a description of the evaluator suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo()
{
return "Reads a source that is in xmdv file format (.okc file " +
"containing data with meta information in .meta file).";
}
/**
* Default implementation throws an IOException.
*
* @param file the File
* @throws java.io.IOException always
*/
public void setSource(File file) throws IOException
{
reset();
if ( file == null ) {
throw new IOException( "Source file object is null!" );
}
String fname = file.getName();
String fileStem;
String path = file.getParent();
if ( path != null ) {
path += File.separator;
} else {
path = "";
}
if ( fname.indexOf( '.' ) < 0 ) {
fileStem = fname;
fname += ".meta";
} else {
fileStem = fname.substring( 0, fname.indexOf( '.' ) );
fname = fileStem + ".meta";
}
m_fileStem = fileStem;
file = new File( path + fname );
m_sourceMetaFile = file;
try {
BufferedReader br = new BufferedReader( new FileReader( file ) );
m_metaReader = br;
} catch (FileNotFoundException ex) {
// meta file not found
//throw new IOException( "File not found : " + ( path + fname ) );
}
m_sourceDataFile = new File( path + fileStem + ".okc" );
try {
BufferedReader br = new BufferedReader( new FileReader( m_sourceDataFile ) );
m_dataReader = br;
} catch (FileNotFoundException ex) {
throw new IOException( "File not found : " + ( path + fname ) );
}
}
// private void reset() pancher change
public void reset()
{
m_structure = null;
}
/*
* To be overridden.
*/
public Instances getStructure() throws IOException
{
if ( m_sourceDataFile == null ) {
throw new IOException( "No source has been specified" );
}
if ( m_structure == null ) {
if ( m_metaReader != null ) {
StreamTokenizer st = new StreamTokenizer( m_metaReader );
initTokenizer( st );
readHeaderFromMeta( st );
} else {
StreamTokenizer st = new StreamTokenizer( m_dataReader );
initTokenizer( st );
readHeaderFromOKC( st );
}
}
return m_structure;
}
private void readHeaderFromOKC(StreamTokenizer tokenizer)
throws IOException
{
ConverterUtils.getFirstToken( tokenizer );
if ( tokenizer.ttype == StreamTokenizer.TT_EOF ) {
ConverterUtils.errms( tokenizer, "premature end of file" );
}
// read number of dimensions
if ( tokenizer.ttype != StreamTokenizer.TT_EOL ) {
m_numAttribs = Integer.parseInt( tokenizer.sval.trim() );
}
// read number of data items
ConverterUtils.getToken( tokenizer );
if ( tokenizer.ttype != StreamTokenizer.TT_EOL ) {
m_numInstances = Integer.parseInt( tokenizer.sval.trim() );
}
FastVector attribDefs = new FastVector();
for ( int i = 0; i < m_numAttribs; i++ ) {
ConverterUtils.getToken( tokenizer );
if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
ConverterUtils.errms( tokenizer, "premature end of line. Expected "
+ "attribute name." );
}
String attribName = tokenizer.sval;
attribDefs.addElement( new Attribute( attribName ) );
}
m_structure = new Instances( m_fileStem, attribDefs, 0 );
m_structure.setClassIndex( m_structure.numAttributes() - 1 );
// ignore dim min, max and cardinality
for ( int i = 0; i < m_numAttribs; i++ ) {
ConverterUtils.getToken( tokenizer );
if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
ConverterUtils.errms( tokenizer, "premature end of line. Expected "
+ "attribute min value." );
}
ConverterUtils.getToken( tokenizer );
if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
ConverterUtils.errms( tokenizer, "premature end of line. Expected "
+ "attribute max value." );
}
ConverterUtils.getToken( tokenizer );
if ( tokenizer.ttype == StreamTokenizer.TT_EOL ) {
ConverterUtils.errms( tokenizer, "premature end of line. Expected "
+ "attribute cardinality value." );
}
}
}
private void initTokenizer(StreamTokenizer tokenizer)
{
tokenizer.resetSyntax();
tokenizer.whitespaceChars( 0, ( ' ' - 1 ) );
tokenizer.wordChars( ' ', '\u00FF' );
tokenizer.whitespaceChars( ' ', ' ' );
tokenizer.whitespaceChars( '\n', '\n' );
tokenizer.whitespaceChars( ',', ',' );
tokenizer.whitespaceChars( ':', ':' );
tokenizer.commentChar( '#' );
tokenizer.whitespaceChars( '\t', '\t' );
tokenizer.quoteChar( '"' );
tokenizer.quoteChar( '\'' );
tokenizer.eolIsSignificant( false );
}
private void readHeaderFromMeta(StreamTokenizer st)
{
//To change body of created methods use File | Settings | File Templates.
}
/*
* To be overridden.
*/
public Instances getDataSet() throws IOException
{
if ( m_sourceDataFile == null ) {
throw new IOException( "No source has been specified" );
}
if ( getRetrieval() == INCREMENTAL ) {
throw new IOException( "Cannot mix getting Instances in both incremental and batch modes" );
}
setRetrieval( BATCH );
if ( m_structure == null ) {
getStructure();
}
StreamTokenizer st = new StreamTokenizer( m_dataReader );
initTokenizer( st );
Instances result = new Instances( m_structure );
for ( int i = 0; i < m_numInstances; i++ ) {
Instance current = getInstance( st );
result.add( current );
}
return result;
}
private Instance getInstance(StreamTokenizer tokenizer) throws IOException
{
double[] instance = new double[m_structure.numAttributes()];
ConverterUtils.getFirstToken( tokenizer );
if ( tokenizer.ttype == StreamTokenizer.TT_EOF ) {
ConverterUtils.errms( tokenizer, "data expected" );
}
int counter = 0;
for ( int i = 0; i < m_numAttribs; i++ ) {
if ( i > 0 ) {
ConverterUtils.getToken( tokenizer );
}
String val = tokenizer.sval;
val.trim();
if ( i == m_numAttribs - 1 ) {
// remove trailing period
if ( val.charAt( val.length() - 1 ) == '.' ) {
val = val.substring( 0, val.length() - 1 );
}
}
if ( m_structure.attribute( counter ).isNominal() ) {
int index = m_structure.attribute( counter )
.indexOfValue( val );
if ( index == -1 ) {
ConverterUtils.errms( tokenizer, "nominal value not declared in "
+ "header :" + val + " column " + i );
}
instance[counter++] = (double) index;
} else if ( m_structure.attribute( counter ).isNumeric() ) {
try {
instance[counter++] = Double.valueOf( val )
.doubleValue();
} catch (NumberFormatException e) {
ConverterUtils.errms( tokenizer, "number expected" );
}
} else {
System.err.println( "Shouldn't get here" );
System.exit( 1 );
}
}
return new Instance( 1.0, instance );
}
/*
* To be overridden.
*/
public Instance getNextInstance() throws IOException
{
if ( m_sourceDataFile == null ) {
throw new IOException( "No source has been specified" );
}
if ( getRetrieval() == BATCH ) {
throw new IOException( "Cannot mix getting Instances in both incremental and batch modes" );
}
setRetrieval( INCREMENTAL );
if ( m_structure == null ) {
getStructure();
}
StreamTokenizer st = new StreamTokenizer( m_dataReader );
initTokenizer( st );
Instance nextI = getInstance( st );
if ( nextI != null ) {
nextI.setDataset( m_structure );
}
return nextI;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -