📄 fileparser.java
字号:
package ai.decision.algorithm;
import java.io.*;
import java.util.*;
/**
* The FileParser class contains methods that are used to
* parse decision tree metadata and example files.
*
* <p>
* Metadata files have the following format:
*
* <p><pre>
*
* <infofile> :== CONCLUSION =
* <feature>
*
* FEATURES =
* <feature>,
* ...
*
* TRAINDATA = "<string>"
*
* <feature> :== "<string>"
* = { '<string>', ... }
* </pre>
*
* <p>
* Data files have the following format:
*
* <p><pre>
* <datafile> :== <featureset> \n ...
*
* <featureset> :== <string>,...
* </pre>
*
* <p>
* <b>Change History:</b>
*
* <p><pre>
* Name: Date: Change:
* =============================================================
* J. Kelly May-04-2000 Created.
* </pre>
*
* Copyright 2000 University of Alberta.
*
* <!--
* This file is part of the Decision Tree Applet.
*
* The Decision Tree Applet is free software; you can redistribute it
* and/or modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* Foobar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with the Decision Tree Applet; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* -->
*/
public class FileParser
{
// Instance data members
LineNumberReader m_lineReader; // Reads file line-by-line.
String m_fileText; // Text buffer.
boolean m_openBracket; // Indicates an open bracket
// has been parsed - used as an
// identifier.
boolean m_attributeDomain; // Indicates that the next
// item to be parsed is an
// attribute domain.
boolean m_attributesRemain; // Indicates that there are
// remaining attributes to
// be parsed in this section of
// of the file.
// Constructors
/**
* Creates a new FileParser.
*/
public FileParser()
{}
// Public methods
/**
* Opens the specified configuration file and prepares
* it for parsing by reading it into memory.
*
* @param metaInputStream A stream attached to the metadata
* file for a given data set.
*
* @throws IOException If a problem occurs while reading
* the configuration file.
*/
public void startMetaParse( InputStream metaInputStream )
throws IOException
{
// Try to create a new line reader.
m_lineReader = new LineNumberReader(
new InputStreamReader( metaInputStream ) );
// Since the configuration files are small (generally) and
// need to be parsed, we read the entire file at once and
// concatenate the text into one long string.
// At some point, we should use a more robust parsing system.
StringBuffer tempFileText = new StringBuffer();
String tempLine = null;
while( (tempLine = m_lineReader.readLine()) != null )
tempFileText.append( tempLine );
m_fileText = tempFileText.toString();
m_lineReader = null;
}
/**
* Opens the specified data file and prepares it for
* parsing. Data files are read one line at a time.
*
* @param dataInputStream A stream attached to the data file
* for a given dataset.
*
* @throws IOException If a problem occurs while
* opening the data file.
*/
public void startDataParse( InputStream dataInputStream )
throws IOException
{
// Try to open the data file.
m_lineReader = new LineNumberReader(
new InputStreamReader( dataInputStream ) );
// Data files (which can be extremely long) are read
// and parsed one line at a time.
}
/**
* Moves through the configuration file data (stored in
* memory) to the position where the target attribute
* name should be located.
*
* @throws InvalidMetaFileException If a syntax
* error is found.
*/
public void moveToTargetAttribute()
throws InvalidMetaFileException
{
stripString( "CONCLUSION" );
m_attributesRemain = true;
}
/**
* Moves through the configuration file data (stored in
* memory) to the position where the general attribute
* information should be located.
*
* @throws InvalidMetaFileException If a syntax
* error is found.
*/
public void moveToAttributes()
throws InvalidMetaFileException
{
stripString( "FEATURES" );
m_attributesRemain = true;
}
/**
* Moves through the configuration file (stored in
* memory) to the position where the training data file
* path should be located.
*
* @throws InvalidMetaFileException if a syntax
* error is found.
*/
public void moveToDataFilePath()
throws InvalidMetaFileException
{
stripString( "TRAINDATA" );
}
/**
* Returns the character string between the first pair of
* double quotes. The method checks for and removes the quotes,
* any characters between the quotes, and the '=' sign that
* follows the quotes.
*
* @return A String containing the name of the attribute that is
* extracted.
*
* @throws InvalidMetaFileException If a syntax error is
* found.
*/
public String extractAttributeName()
throws InvalidMetaFileException
{
// Have we already finished parsing all attribute names?
if( !m_attributesRemain ) return null;
// Attribute names are enclosed in double-quotes.
String attributeName = extractString( '\"', '\"', true );
// Strip the '=' sign after the quoted attribute name.
if( !stripCharacter( '=' ) )
throw new
InvalidMetaFileException( "Syntax error in " +
"configuration file: No '=' sign after attribute name." );
// Set the attribute domain flag to indicate that
// the next thing we should be looking to parse
// is a set of attribute values.
m_attributeDomain = true;
return attributeName;
}
/**
* Returns the character string between the first pair of
* single quotes. The method checks for and removes the quotes,
* any characters between the quotes, and any curly brace
* delimiters.
*
* @return A String containing the name of the next
* attribute value, or null if there are no
* more attribute values.
*/
public String extractAttributeValue()
throws InvalidMetaFileException
{
// Have we already finished parsing values?
if( !m_attributeDomain ) return null;
// Have we started parsing values yet?
// (i.e. have we stripped out the initial curly brace?)
if( !m_openBracket ) {
if( !stripCharacter( '{' ) )
throw new
InvalidMetaFileException( "Syntax error " +
"in configuration file: Missing '{' before attribute values." );
m_openBracket = true;
}
// Extract the value name from the file text.
String valueName = extractString( '\'', '\'', true );
// Strip the ',' after the quoted value name. If
// there's no ',' character, then look for the closing curly brace.
if( !stripCharacter( ',' ) ) {
if( !stripCharacter( '}' ) )
throw new
InvalidMetaFileException( "Syntax error in " +
"configuration file: No ',' or '}' after attribute name." );
else {
m_openBracket = false; // bracket matched
m_attributeDomain = false; // finished this domain
if( !stripCharacter( ',' ) )
// There are no more attributes to parse.
m_attributesRemain = false;
}
}
return valueName;
}
/**
* Extracts and returns a string from the current
* file text. Double-quote characters are used as
* delimeters. The string and the quotes are removed
* from the file text.
*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -