📄 arffloader.java
字号:
FastVector atts = attributes; attributes = new FastVector(); // Now, read attributes until we hit end of declaration of relational value getFirstToken(); if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) { errorMessage("premature end of file"); } do { if (Attribute.ARFF_ATTRIBUTE.equalsIgnoreCase(m_Tokenizer.sval)) { attributes = parseAttribute(attributes); } else if (Attribute.ARFF_END_SUBRELATION.equalsIgnoreCase(m_Tokenizer.sval)) { getNextToken(); if (!attributeName.equalsIgnoreCase(m_Tokenizer.sval)) { errorMessage("declaration of subrelation " + attributeName + " must be terminated by " + "@end " + attributeName); } break; } else { errorMessage("declaration of subrelation " + attributeName + " must be terminated by " + "@end " + attributeName); } } while (true); // Make relation and restore original set of attributes Instances relation = new Instances(attributeName, attributes, 0); attributes = atts; attributes.addElement(new Attribute(attributeName, relation, attributes.size())); } else { errorMessage("no valid attribute type or invalid "+ "enumeration"); } } else { // Attribute is nominal. attributeValues = new FastVector(); m_Tokenizer.pushBack(); // Get values for nominal attribute. if (m_Tokenizer.nextToken() != '{') { errorMessage("{ expected at beginning of enumeration"); } while (m_Tokenizer.nextToken() != '}') { if (m_Tokenizer.ttype == StreamTokenizer.TT_EOL) { errorMessage("} expected at end of enumeration"); } else { attributeValues.addElement(m_Tokenizer.sval); } } attributes. addElement(new Attribute(attributeName, attributeValues, attributes.size())); } getLastToken(false); getFirstToken(); if (m_Tokenizer.ttype == StreamTokenizer.TT_EOF) errorMessage("premature end of file"); return attributes; } /** * Reads and skips all tokens before next end of line token. * * @throws IOException in case something goes wrong */ protected void readTillEOL() throws IOException { while (m_Tokenizer.nextToken() != StreamTokenizer.TT_EOL) {}; m_Tokenizer.pushBack(); } /** * Returns the header format * * @return the header format */ public Instances getStructure() { return new Instances(m_Data, 0); } /** * Returns the data that was read * * @return the data */ public Instances getData() { return m_Data; } } /** * Returns a string describing this Loader * @return a description of the Loader suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Reads a source that is in arff (attribute relation file format) " +"format. "; } /** * Get the file extension used for arff files * * @return the file extension */ public String getFileExtension() { return FILE_EXTENSION; } /** * Gets all the file extensions used for this type of file * * @return the file extensions */ public String[] getFileExtensions() { return new String[]{FILE_EXTENSION, FILE_EXTENSION_COMPRESSED}; } /** * Returns a description of the file type. * * @return a short file description */ public String getFileDescription() { return "Arff data files"; } /** * Resets the Loader ready to read a new data set * * @throws IOException if something goes wrong */ public void reset() throws IOException { m_structure = null; setRetrieval(NONE); if (m_File != null && (new File(m_File)).isFile()) { setFile(new File(m_File)); } else if (m_URL != null & !m_URL.equals("http://")) { setURL(m_URL); } } /** * Resets the Loader object and sets the source of the data set to be * the supplied url. * * @param url the source url. * @throws IOException if an error occurs */ public void setSource(URL url) throws IOException { m_structure = null; setRetrieval(NONE); setSource(url.openStream()); m_URL = url.toString(); } /** * get the File specified as the source * * @return the source file */ public File retrieveFile() { return new File(m_File); } /** * sets the source File * * @param file the source file * @throws IOException if an error occurs */ public void setFile(File file) throws IOException { m_File = file.getAbsolutePath(); setSource(file); } /** * Resets the Loader object and sets the source of the data set to be * the supplied File object. * * @param file the source file. * @throws IOException if an error occurs */ public void setSource(File file) throws IOException { m_structure = null; setRetrieval(NONE); if (file == null) throw new IOException("Source file object is null!"); try { if (file.getName().endsWith(FILE_EXTENSION_COMPRESSED)) setSource(new GZIPInputStream(new FileInputStream(file))); else setSource(new FileInputStream(file)); } catch (FileNotFoundException ex) { throw new IOException("File not found"); } m_sourceFile = file; m_File = file.getAbsolutePath(); } /** * Set the url to load from * * @param url the url to load from * @throws IOException if the url can't be set. */ public void setURL(String url) throws IOException { m_URL = url; setSource(new URL(url)); } /** * Return the current url * * @return the current url */ public String retrieveURL() { return m_URL; } /** * Resets the Loader object and sets the source of the data set to be * the supplied InputStream. * * @param in the source InputStream. * @throws IOException always thrown. */ public void setSource(InputStream in) throws IOException { m_File = (new File(System.getProperty("user.dir"))).getAbsolutePath(); m_URL = "http://"; m_sourceReader = new BufferedReader(new InputStreamReader(in)); } /** * Determines and returns (if possible) the structure (internally the * header) of the data set as an empty set of instances. * * @return the structure of the data set as an empty set of Instances * @throws IOException if an error occurs */ public Instances getStructure() throws IOException { if (m_sourceReader == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { try { m_ArffReader = new ArffReader(m_sourceReader, 1); m_structure = m_ArffReader.getStructure(); } catch (Exception ex) { throw new IOException("Unable to determine structure as arff (Reason: " + ex.toString() + ")."); } } return new Instances(m_structure, 0); } /** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set. * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_sourceReader == null) { throw new IOException("No source has been specified"); } if (getRetrieval() == INCREMENTAL) { throw new IOException("Cannot mix getting Instances in both incremental and batch modes"); } setRetrieval(BATCH); if (m_structure == null) { getStructure(); } // Read all instances Instance inst; while ((inst = m_ArffReader.readInstance(m_structure)) != null) m_structure.add(inst); Instances readIn = new Instances(m_structure); return readIn; } /** * Read the data set incrementally---get the next instance in the data * set or returns null if there are no * more instances to get. If the structure hasn't yet been * determined by a call to getStructure then method should do so before * returning the next instance in the data set. * * @param structure the dataset header information, will get updated in * case of string or relational attributes * @return the next instance in the data set as an Instance object or null * if there are no more instances to be read * @throws IOException if there is an error during parsing */ public Instance getNextInstance(Instances structure) throws IOException { m_structure = structure; if (getRetrieval() == BATCH) { throw new IOException("Cannot mix getting Instances in both incremental and batch modes"); } setRetrieval(INCREMENTAL); Instance current = m_ArffReader.readInstance(m_structure); if (current == null) { try { reset(); } catch (Exception ex) { ex.printStackTrace(); } } return current; } /** * Main method. * * @param args should contain the name of an input file. */ public static void main(String [] args) { runFileLoader(new ArffLoader(), args); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -