📄 c45loader.java
字号:
} catch (Exception ex) { ex.printStackTrace(); } return result; } /** * Read the data set incrementally---get the next instance in the data * set or returns null if there are no * more instances to get. If the structure hasn't yet been * determined by a call to getStructure then method should do so before * returning the next instance in the data set. * * If it is not possible to read the data set incrementally (ie. in cases * where the data set structure cannot be fully established before all * instances have been seen) then an exception should be thrown. * * @return the next instance in the data set as an Instance object or null * if there are no more instances to be read * @exception IOException if there is an error during parsing */ public Instance getNextInstance() throws IOException { if (m_sourceFile == null) { throw new IOException("No source has been specified"); } if (getRetrieval() == BATCH) { throw new IOException("Cannot mix getting Instances in both incremental and batch modes"); } setRetrieval(INCREMENTAL); if (m_structure == null) { getStructure(); } StreamTokenizer st = new StreamTokenizer(m_dataReader); initTokenizer(st); // st.ordinaryChar('.'); Instance nextI = getInstance(st); if (nextI != null) { nextI.setDataset(m_structure); } else{ try { reset(); } catch (Exception ex) { ex.printStackTrace(); } } return nextI; } /** * Reads an instance using the supplied tokenizer. * * @param tokenizer the tokenizer to use * @return an Instance or null if there are no more instances to read * @exception IOException if an error occurs */ private Instance getInstance(StreamTokenizer tokenizer) throws IOException { double [] instance = new double[m_structure.numAttributes()]; ConverterUtils.getFirstToken(tokenizer); if (tokenizer.ttype == StreamTokenizer.TT_EOF) { return null; } int counter = 0; for (int i = 0; i < m_numAttribs; i++) { if (i > 0) { ConverterUtils.getToken(tokenizer); } if (!m_ignore[i]) { // Check if value is missing. if (tokenizer.ttype == '?') { instance[counter++] = Instance.missingValue(); } else { String val = tokenizer.sval; if (i == m_numAttribs - 1) { // remove trailing period if (val.charAt(val.length()-1) == '.') { val = val.substring(0,val.length()-1); } } if (m_structure.attribute(counter).isNominal()) { int index = m_structure.attribute(counter).indexOfValue(val); if (index == -1) { ConverterUtils.errms(tokenizer, "nominal value not declared in " +"header :"+val+" column "+i); } instance[counter++] = (double)index; } else if (m_structure.attribute(counter).isNumeric()) { try { instance[counter++] = Double.valueOf(val).doubleValue(); } catch (NumberFormatException e) { ConverterUtils.errms(tokenizer, "number expected"); } } else { System.err.println("Shouldn't get here"); System.exit(1); } } } } return new Instance(1.0, instance); } /** * removes the trailing period * * @param val the string to work on * @return the processed string */ private String removeTrailingPeriod(String val) { // remove trailing period if (val.charAt(val.length()-1) == '.') { val = val.substring(0,val.length()-1); } return val; } /** * Reads header (from the names file) using the supplied tokenizer * * @param tokenizer the tokenizer to use * @exception IOException if an error occurs */ private void readHeader(StreamTokenizer tokenizer) throws IOException { FastVector attribDefs = new FastVector(); FastVector ignores = new FastVector(); ConverterUtils.getFirstToken(tokenizer); if (tokenizer.ttype == StreamTokenizer.TT_EOF) { ConverterUtils.errms(tokenizer,"premature end of file"); } m_numAttribs = 1; // Read the class values FastVector classVals = new FastVector(); while (tokenizer.ttype != StreamTokenizer.TT_EOL) { String val = tokenizer.sval.trim(); if (val.length() > 0) { val = removeTrailingPeriod(val); classVals.addElement(val); } ConverterUtils.getToken(tokenizer); } // read the attribute names and types int counter = 0; while (tokenizer.ttype != StreamTokenizer.TT_EOF) { ConverterUtils.getFirstToken(tokenizer); if (tokenizer.ttype != StreamTokenizer.TT_EOF) { String attribName = tokenizer.sval; ConverterUtils.getToken(tokenizer); if (tokenizer.ttype == StreamTokenizer.TT_EOL) { ConverterUtils.errms(tokenizer, "premature end of line. Expected " +"attribute type."); } String temp = tokenizer.sval.toLowerCase().trim(); if (temp.startsWith("ignore") || temp.startsWith("label")) { ignores.addElement(new Integer(counter)); counter++; } else if (temp.startsWith("continuous")) { attribDefs.addElement(new Attribute(attribName)); counter++; } else { counter++; // read the values of the attribute FastVector attribVals = new FastVector(); while (tokenizer.ttype != StreamTokenizer.TT_EOL && tokenizer.ttype != StreamTokenizer.TT_EOF) { String val = tokenizer.sval.trim(); if (val.length() > 0) { val = removeTrailingPeriod(val); attribVals.addElement(val); } ConverterUtils.getToken(tokenizer); } attribDefs.addElement(new Attribute(attribName, attribVals)); } } } boolean ok = true; int i = -1; if (classVals.size() == 1) { // look to see if this is an attribute name (ala c5 names file style) for (i = 0; i < attribDefs.size(); i++) { if (((Attribute)attribDefs.elementAt(i)) .name().compareTo((String)classVals.elementAt(0)) == 0) { ok = false; m_numAttribs--; break; } } } if (ok) { attribDefs.addElement(new Attribute("Class", classVals)); } m_structure = new Instances(m_fileStem, attribDefs, 0); try { if (ok) { m_structure.setClassIndex(m_structure.numAttributes()-1); } else { m_structure.setClassIndex(i); } } catch (Exception ex) { ex.printStackTrace(); } m_numAttribs = m_structure.numAttributes() + ignores.size(); m_ignore = new boolean[m_numAttribs]; for (i = 0; i < ignores.size(); i++) { m_ignore[((Integer)ignores.elementAt(i)).intValue()] = true; } } /** * Initializes the stream tokenizer * * @param tokenizer the tokenizer to initialize */ private void initTokenizer(StreamTokenizer tokenizer) { tokenizer.resetSyntax(); tokenizer.whitespaceChars(0, (' '-1)); tokenizer.wordChars(' ','\u00FF'); tokenizer.whitespaceChars(',',','); tokenizer.whitespaceChars(':',':'); // tokenizer.whitespaceChars('.','.'); tokenizer.commentChar('|'); tokenizer.whitespaceChars('\t','\t'); tokenizer.quoteChar('"'); tokenizer.quoteChar('\''); tokenizer.eolIsSignificant(true); } /** * Main method for testing this class. * * @param args should contain <filestem>[.names | data] */ public static void main (String [] args) { if (args.length > 0) { File inputfile; inputfile = new File(args[0]); try { C45Loader cta = new C45Loader(); cta.setSource(inputfile); System.out.println(cta.getStructure()); Instance temp = cta.getNextInstance(); while (temp != null) { System.out.println(temp); temp = cta.getNextInstance(); } } catch (Exception ex) { ex.printStackTrace(); } } else { System.err.println("Usage:\n\tC45Loader <filestem>[.names | data]\n"); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -