📄 c45loader.java
字号:
}
return result;
}
/**
* Read the data set incrementally---get the next instance in the data
* set or returns null if there are no
* more instances to get. If the structure hasn't yet been
* determined by a call to getStructure then method should do so before
* returning the next instance in the data set.
*
* If it is not possible to read the data set incrementally (ie. in cases
* where the data set structure cannot be fully established before all
* instances have been seen) then an exception should be thrown.
*
* @return the next instance in the data set as an Instance object or null
* if there are no more instances to be read
* @exception IOException if there is an error during parsing
*/
public Instance getNextInstance() throws IOException {
if (m_sourceFile == null) {
throw new IOException("No source has been specified");
}
if (getRetrieval() == BATCH) {
throw new IOException("Cannot mix getting Instances in both incremental and batch modes");
}
setRetrieval(INCREMENTAL);
if (m_structure == null) {
getStructure();
}
StreamTokenizer st = new StreamTokenizer(m_dataReader);
initTokenizer(st);
// st.ordinaryChar('.');
Instance nextI = getInstance(st);
if (nextI != null) {
nextI.setDataset(m_structure);
}
else{
try {
reset();
} catch (Exception ex) {
ex.printStackTrace();
}
}
return nextI;
}
/**
* Reads an instance using the supplied tokenizer.
*
* @param tokenizer the tokenizer to use
* @return an Instance or null if there are no more instances to read
* @exception IOException if an error occurs
*/
private Instance getInstance(StreamTokenizer tokenizer)
throws IOException {
double [] instance = new double[m_structure.numAttributes()];
ConverterUtils.getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
return null;
}
int counter = 0;
for (int i = 0; i < m_numAttribs; i++) {
if (i > 0) {
ConverterUtils.getToken(tokenizer);
}
if (!m_ignore[i]) {
// Check if value is missing.
if (tokenizer.ttype == '?') {
instance[counter++] = Instance.missingValue();
} else {
String val = tokenizer.sval;
if (i == m_numAttribs - 1) {
// remove trailing period
if (val.charAt(val.length()-1) == '.') {
val = val.substring(0,val.length()-1);
}
}
if (m_structure.attribute(counter).isNominal()) {
int index = m_structure.attribute(counter).indexOfValue(val);
if (index == -1) {
ConverterUtils.errms(tokenizer, "nominal value not declared in "
+"header :"+val+" column "+i);
}
instance[counter++] = (double)index;
} else if (m_structure.attribute(counter).isNumeric()) {
try {
instance[counter++] = Double.valueOf(val).doubleValue();
} catch (NumberFormatException e) {
ConverterUtils.errms(tokenizer, "number expected");
}
} else {
System.err.println("Shouldn't get here");
System.exit(1);
}
}
}
}
return new Instance(1.0, instance);
}
private String removeTrailingPeriod(String val) {
// remove trailing period
if (val.charAt(val.length()-1) == '.') {
val = val.substring(0,val.length()-1);
}
return val;
}
/**
* Reads header (from the names file) using the supplied tokenizer
*
* @param tokenizer the tokenizer to use
* @exception IOException if an error occurs
*/
private void readHeader(StreamTokenizer tokenizer) throws IOException {
FastVector attribDefs = new FastVector();
FastVector ignores = new FastVector();
ConverterUtils.getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
ConverterUtils.errms(tokenizer,"premature end of file");
}
m_numAttribs = 1;
// Read the class values
FastVector classVals = new FastVector();
while (tokenizer.ttype != StreamTokenizer.TT_EOL) {
String val = tokenizer.sval.trim();
if (val.length() > 0) {
val = removeTrailingPeriod(val);
classVals.addElement(val);
}
ConverterUtils.getToken(tokenizer);
}
// read the attribute names and types
int counter = 0;
while (tokenizer.ttype != StreamTokenizer.TT_EOF) {
ConverterUtils.getFirstToken(tokenizer);
if (tokenizer.ttype != StreamTokenizer.TT_EOF) {
String attribName = tokenizer.sval;
ConverterUtils.getToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
ConverterUtils.errms(tokenizer, "premature end of line. Expected "
+"attribute type.");
}
String temp = tokenizer.sval.toLowerCase().trim();
if (temp.startsWith("ignore") || temp.startsWith("label")) {
ignores.addElement(new Integer(counter));
counter++;
} else if (temp.startsWith("continuous")) {
attribDefs.addElement(new Attribute(attribName));
counter++;
} else {
counter++;
// read the values of the attribute
FastVector attribVals = new FastVector();
while (tokenizer.ttype != StreamTokenizer.TT_EOL &&
tokenizer.ttype != StreamTokenizer.TT_EOF) {
String val = tokenizer.sval.trim();
if (val.length() > 0) {
val = removeTrailingPeriod(val);
attribVals.addElement(val);
}
ConverterUtils.getToken(tokenizer);
}
attribDefs.addElement(new Attribute(attribName, attribVals));
}
}
}
boolean ok = true;
int i = -1;
if (classVals.size() == 1) {
// look to see if this is an attribute name (ala c5 names file style)
for (i = 0; i < attribDefs.size(); i++) {
if (((Attribute)attribDefs.elementAt(i))
.name().compareTo((String)classVals.elementAt(0)) == 0) {
ok = false;
m_numAttribs--;
break;
}
}
}
if (ok) {
attribDefs.addElement(new Attribute("Class", classVals));
}
m_structure = new Instances(m_fileStem, attribDefs, 0);
try {
if (ok) {
m_structure.setClassIndex(m_structure.numAttributes()-1);
} else {
m_structure.setClassIndex(i);
}
} catch (Exception ex) {
ex.printStackTrace();
}
m_numAttribs = m_structure.numAttributes() + ignores.size();
m_ignore = new boolean[m_numAttribs];
for (i = 0; i < ignores.size(); i++) {
m_ignore[((Integer)ignores.elementAt(i)).intValue()] = true;
}
}
/**
* Initializes the stream tokenizer
*
* @param tokenizer the tokenizer to initialize
*/
private void initTokenizer(StreamTokenizer tokenizer) {
tokenizer.resetSyntax();
tokenizer.whitespaceChars(0, (' '-1));
tokenizer.wordChars(' ','\u00FF');
tokenizer.whitespaceChars(',',',');
tokenizer.whitespaceChars(':',':');
// tokenizer.whitespaceChars('.','.');
tokenizer.commentChar('|');
tokenizer.whitespaceChars('\t','\t');
tokenizer.quoteChar('"');
tokenizer.quoteChar('\'');
tokenizer.eolIsSignificant(true);
}
/**
* Main method for testing this class.
*
* @param args should contain <filestem>[.names | data]
*/
public static void main (String [] args) {
if (args.length > 0) {
File inputfile;
inputfile = new File(args[0]);
try {
C45Loader cta = new C45Loader();
cta.setSource(inputfile);
System.out.println(cta.getStructure());
Instance temp = cta.getNextInstance();
while (temp != null) {
System.out.println(temp);
temp = cta.getNextInstance();
}
} catch (Exception ex) {
ex.printStackTrace();
}
} else {
System.err.println("Usage:\n\tC45Loader <filestem>[.names | data]\n");
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -