📄 miningarffstream.java
字号:
}
}
else
{
try {
instance[i] = Double.parseDouble(token);
}
catch(NumberFormatException ex) {
instance[i] = Category.MISSING_VALUE;
}
}
if ( Category.isMissingValue(instance[i]) )
{
missingValues = true;
}
}
token = getNextToken();
if( !token.equalsIgnoreCase( "endofline" ) )
{
if( !token.equalsIgnoreCase( "endoffile" ) )
{
tokenizerException( "end of line expected" );
}
}
// Add instance to dataset
cursorVector = new MiningVector(instance);
cursorVector.setMetaData( metaData );
cursorPosition++;
return true;
}
else
{
double[] m_ValueBuffer = new double[ metaData.getAttributesNumber() ];
int[] m_IndicesBuffer = new int[ metaData.getAttributesNumber() ];
int valIndex, numValues = 0, maxIndex = -1;
do
{
token = getNextToken();
if (tokenizer.ttype == '}')
{
break;
}
// Is index valid?
try
{
m_IndicesBuffer[numValues] = Integer.valueOf( token ).intValue();
}
catch(NumberFormatException e)
{
tokenizerException( "index number expected" );
}
if( m_IndicesBuffer[numValues] <= maxIndex )
{
tokenizerException( "indices have to be ordered" );
}
if( ( m_IndicesBuffer[numValues] < 0 ) || ( m_IndicesBuffer[numValues] >= metaData.getAttributesNumber() ) )
{
tokenizerException( "index out of bounds" );
}
maxIndex = m_IndicesBuffer[numValues];
// Get value;
token = getNextToken();
//<<09/03/2005, Frank J. Xu
//do not handle this kind of error, except throw exceptions.
//Bug: 124,It shows error message: "end of line expected, read token[4,9], line 75]" when
//running source node with \\etifs2\BI\96 Sample Data\arff\iris-error.arff. The
//first data vector contains an empty data i.e. 5.1,3.5,1.4,0.2,,0
if(token.equalsIgnoreCase( "endofline" ) )
{
int startIndex = tokenizer.toString().lastIndexOf(" ")-4;
int endIndex = tokenizer.toString().length();
String errorMsg = tokenizer.toString().substring(startIndex, endIndex);
throw new MiningDataException("Invalid characters in" + errorMsg);
}
//>>09/03/2005, Frank J. Xu
MiningAttribute attribute = metaData.getMiningAttribute( m_IndicesBuffer[numValues] );
if(attribute instanceof CategoricalAttribute)
{
Category cat = new Category( token.toString(), token, new CategoryProperty() );
m_ValueBuffer[numValues] = ((CategoricalAttribute)attribute).getKey( cat );
}
else m_ValueBuffer[numValues] = Double.parseDouble( token );
if( m_ValueBuffer[numValues] == Category.MISSING_VALUE )
{
missingValues = true;
}
numValues++;
}
while( true );
token = getNextToken();
if( !token.equalsIgnoreCase( "endofline" ) )
{
if( !token.equalsIgnoreCase( "endoffile" ) )
{
tokenizerException( "end of line expected" );
}
}
// Add instance to dataset
double[] tempValues = new double[numValues];
int[] tempIndices = new int[numValues];
System.arraycopy(m_ValueBuffer, 0, tempValues, 0, numValues);
System.arraycopy(m_IndicesBuffer, 0, tempIndices, 0, numValues);
// Add instance to dataset
cursorVector = new MiningSparseVector( 1, tempValues, tempIndices );
cursorVector.setMetaData( metaData );
cursorPosition++;
return true;
}
}
// -----------------------------------------------------------------------
// Methods of reading from the stream
// -----------------------------------------------------------------------
/**
* Reads current data vector.
*
* @return data vector at current cursor position
*/
public MiningVector read() throws MiningException
{
return cursorVector;
}
// -----------------------------------------------------------------------
// Methods of writing into the stream
// -----------------------------------------------------------------------
private PrintWriter writer;
/**
* Sets new meta data to this stream. Removes all data.
*
* @param metaData new meta data to set
* @exception MiningException if an error occurs
*/
public void updateSetMetaData(MiningDataSpecification metaData) throws MiningException
{
this.metaData = metaData;
updateRemoveAllVectors();
};
/**
* Removes all mining vectors from this stream. Note that metadata is not
* affected by this operation since it is fixed for any stream.
*
* @exception MiningException if an error occurs
*/
public void updateRemoveAllVectors() throws MiningException
{
try {
if (writer != null) writer.close();
writer = new PrintWriter( new FileWriter( fileName ) );
writer.println( metaData.createArffDescription() );
// writer.println();
writer.println("@data");
}
catch (Exception ex) {
throw new MiningException("Can't write to file: "+fileName);
};
};
/**
* Appends new mining vector to this stream. Only for updateble
* input streams. Before using this method the first time,
* updateSetMetaData or updateRemoveAllVectors must be called.
*
* @param vector mining vector to add
* @exception MiningException if an error occurs
*/
public void updateAppendVector(MiningVector vector) throws MiningException
{
String line = "";
MiningDataSpecification mds = vector.getMetaData();
int nVec = vector.getValues().length;
for (int i = 0; i < nVec; i++) {
double value = vector.getValue(i);
String stVal = "";
if (mds == null || mds.getMiningAttribute(i) instanceof NumericAttribute)
stVal = String.valueOf(value);
else
stVal = "" + ((CategoricalAttribute)mds.getMiningAttribute(i)).
getCategory(vector.getValue(i));
if (Category.isMissingValue(value))
stVal = "?";
line = line + stVal;
if (i < nVec - 1)
line = line + ",";
};
writer.println(line);
}
/**
* Close mining ARFF stream by closing reader and writer.
*
* @exception MiningException if a mining source access error occurs
*/
public void close() throws MiningException
{
super.close();
try
{
if (writer != null) writer.close();
}
catch( Exception ex)
{
throw new MiningException( "Can't close ARFF stream from file: "+fileName );
};
}
/**
* Returns next token.
*
* @return next token
*/
protected String getNextToken() throws MiningException
{
String token = "";
int c;
try
{
c = tokenizer.nextToken();
}
catch( IOException ex )
{
throw new MiningDataException( ex.getMessage() );
}
switch( c )
{
case StreamTokenizer.TT_EOL:token = "endofline";break;
case StreamTokenizer.TT_EOF:token = "endoffile";break;
case '{' : token="{"; break;
case '}' : token="}"; break;
case '?' : tokenizer.ttype = StreamTokenizer.TT_WORD; token = "?"; break;
case '\'' : tokenizer.ttype = StreamTokenizer.TT_WORD; token = tokenizer.sval; break;
case '"' : tokenizer.ttype = StreamTokenizer.TT_WORD; token = tokenizer.sval; break;
default : token = tokenizer.sval;break;
}
return token.trim();
}
protected void resetTokenizerSyntax(int a_SyntaxMode)
{
if (tokenizer == null)
{
return;
}
tokenizer.resetSyntax();
// If it is parsing meta data, ' ' would be whiespaceChars
if (a_SyntaxMode == SYNTAX_MODE_META_DATA)
{
tokenizer.whitespaceChars( 0, ' ');
tokenizer.wordChars( ' '+1, '\u00FF' );
}else
{
// If it is parsing data, ' ' would be wordChars
tokenizer.whitespaceChars( 0, ' '-1);
tokenizer.wordChars( ' ', '\u00FF' );
}
tokenizer.whitespaceChars( ',', ',' );
// tokenizer.whitespaceChars( '', '' );
tokenizer.commentChar( '%' );
tokenizer.quoteChar( '"' );
tokenizer.quoteChar( '\'' );
tokenizer.ordinaryChar( '{' );
tokenizer.ordinaryChar( '}' );
tokenizer.eolIsSignificant( true );
}
/**
* Init tokenizer.
*
* @param reader of tokenizer
*/
protected void initTokenizer( Reader reader, int a_SyntaxMode)
{
BufferedReader bufferedInput = new BufferedReader( reader );
tokenizer = new StreamTokenizer( bufferedInput );
resetTokenizerSyntax(a_SyntaxMode);
}
/**
* Throws error message with line number and last token read.
*
* @param message the error message to be thrown
* @throws IOExcpetion containing the error message
*/
protected void tokenizerException( String message ) throws MiningException
{
throw new MiningDataException( message + ", read " + tokenizer.toString() );
}
/**
* Representation as string in Arff format. Attention: changes cursor position.
*
* @return representation of mining input stream as string
*/
public String createArffDescription() {
// Meta data:
String description = metaData.createArffDescription() + "\n";
// Data:
description = description + "@data" + "\n";
// Try to reset stream:
try {
reset();
}
catch (MiningException ex) {
description = description + "Warning: can't reset cursor. " +
"Start reading at current position" + "\n";
};
// Read data:
int i = 0;
try {
while( next() )
description = description + read() + "\n";
i++;
}
catch (Exception ex) {
description = description + "Error: can't read vector " + i;
};
return description;
}
// -----------------------------------------------------------------------
// Test
// -----------------------------------------------------------------------
/**
* Test of ARFF stream.
*
* @param args arguments (ignored)
*/
public static void main(String[] args)
{
try
{
MiningArffStream mas = new MiningArffStream( "data/arff/labor.arff", null );
System.out.println(mas);
}
catch (Exception ex)
{
ex.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -