⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 datasetreaderint.java

📁 bayes network classifier toolbox 贝叶斯网络分类工具箱
💻 JAVA
字号:
/** *  JBNC - Bayesian Network Classifiers Toolbox <p> * *  Latest release available at http://sourceforge.net/projects/jbnc/ <p> * *  Copyright (C) 1999-2003 Jarek Sacha <p> * *  This program is free software; you can redistribute it and/or modify it *  under the terms of the GNU General Public License as published by the Free *  Software Foundation; either version 2 of the License, or (at your option) *  any later version. <p> * *  This program is distributed in the hope that it will be useful, but WITHOUT *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for *  more details. <p> * *  You should have received a copy of the GNU General Public License along with *  this program; if not, write to the Free Software Foundation, Inc., 59 Temple *  Place - Suite 330, Boston, MA 02111-1307, USA. <br> *  http://www.fsf.org/licenses/gpl.txt */package jbnc.dataset;import java.io.BufferedReader;import java.io.FileReader;import java.util.TreeSet;import java.util.Vector;/** *  Functions for reading data sets with test cases. Variables are assumed to be *  'descrete' or 'ignore'. * * @author     Jarek Sacha * @since      June 1, 1999 * @see        jbnc.dataset.Dataset * @see        jbnc.dataset.NamesReader */public final class DatasetReaderInt extends DatasetReader {    /**     *  Reads a data file with cases - comma delimited, no header. This function     *  is typically used to read files in c4.5 format, description of attributes     *  needs to be read from the '.names' first using     *  jbnc.dataset.NamesReader.open().     *     * @param  names          Descriptions of columns in the file.     * @param  fileName       Description of Parameter     * @return                Vector of vectors representing cases. Each case     *      attribute is allocated in type defined by 'names' parameters.     * @exception  Exception  Description of Exception     */    public Vector open(String fileName,                       AttributeSpecs[] names) throws Exception {        int lineCount = 0;        int missingCount = 0;        try {            // Open file            BufferedReader in =                    new BufferedReader(new FileReader(fileName));            Vector v = new Vector();            String s;            // Get cases            while ((s = in.readLine()) != null) {                ++lineCount;                // Read line                ParseLine p = new ParseLine(s);                Vector raw = p.read();                if (raw == null) {                    continue;                }                // Verify and convert                int[] l = convertCaseInt(raw, names);                if (l != null) {                    v.add(l);                } else {                    ++missingCount;                }            }            lineCount = -1;            in.close();            if (missingCount > 0) {                System.out.println("Discarded " + missingCount + " cases with missing values.");            }            return v;        } catch (Exception e) {            if (lineCount > 0) {                System.out.println("Error in line " + lineCount);                System.out.println(e.toString());            }            throw e;        }    }    /**     *  Reads a data file with cases - comma delimited, with header. First line in     *  the file gives names of attributes (columns). In current implementation     *  the attributes are assumed to be discrete.     *     * @param  fileName       Name of the file to read data from.     * @param  className      Name of the attribute representing class. If it is     *      null, it is assumed that the last column represents class.     * @return                Vector of vectors representing cases. Each case     *      attribute is allocated in type defined by 'names' parameters.     * @exception  Exception  Description of Exception     */    public Dataset open(String fileName,                        String className) throws Exception {        int lineCount = 0;        try {            // Open file            BufferedReader in =                    new BufferedReader(new FileReader(fileName));            // Get attribute names            String s;            Vector header = null;            while ((s = in.readLine()) != null) {                ++lineCount;                // Read line                ParseLine p = new ParseLine(s);                header = p.read();                if (header != null) {                    break;                }            }            if (header == null) {                throw new Exception("Data file is empty.");            }            int headerLine = lineCount;            int headerSize = header.size();            int lastCol = headerSize - 1;            Vector cases = new Vector();            // Get cases            while ((s = in.readLine()) != null) {                ++lineCount;                // Read line                ParseLine p = new ParseLine(s);                Vector l = p.read();                if (l == null) {                    continue;                }                if (l.size() != header.size()) {                    throw new Exception("Number of tokens in line #" + lineCount                            + " does not match number of tokens in header in line#"                            + headerLine                            + " (" + l.size() + " != " + header.size() + ")");                }                cases.add(l);            }            lineCount = -1;            in.close();            if (className != null) {                // Reorder columns so that class attribute is the last one in the row                // Find which column is the class in                int classCol = -1;                for (int col = 0; col < headerSize; ++col) {                    if (className.equals(header.get(col))) {                        classCol = col;                        break;                    }                }                if (classCol == -1) {                    throw new Exception("Cannot find class name '"                            + className + "' in the header {" + header + "}.");                }                if (classCol != lastCol) {                    // Swap class name in the header                    Object oc = header.get(classCol);                    header.set(classCol, header.get(lastCol));                    header.set(lastCol, oc);                    // Swap attributes in each row                    for (int row = 0; row < cases.size(); ++row) {                        Vector thisCase = (Vector) cases.get(row);                        Object o = thisCase.get(classCol);                        thisCase.set(classCol, thisCase.get(lastCol));                        thisCase.set(lastCol, o);                    }                }            }            // Create enumerations of values for each attribute            TreeSet[] ss = new TreeSet[headerSize];            for (int col = 0; col < headerSize; ++col) {                ss[col] = new TreeSet();            }            for (int row = 0; row < cases.size(); ++row) {                Vector thisCase = (Vector) cases.get(row);                for (int col = 0; col < headerSize; ++col) {                    ss[col].add(thisCase.get(col));                }            }            // Create attribute descriptions            AttributeSpecs[] names = new AttributeSpecs[headerSize];            for (int i = 0; i < headerSize; ++i) {                Object[] a = ss[i].toArray();                String[] states = new String[a.length];                for (int j = 0; j < a.length; ++j) {                    states[j] = (String) a[j];                }                names[i] = new AttributeSpecs();                names[i].setType(AttributeType.DISCRETE);                names[i].setName((String) header.get(i));                names[i].setStates(states);            }            DatasetInt dataset = new DatasetInt();            dataset.names = names;            dataset.cases = new Vector();            // Create indexed cases            for (int row = 0; row < cases.size(); ++row) {                int[] r = new int[headerSize];                Vector thisCase = (Vector) cases.get(row);                for (int col = 0; col < headerSize; ++col) {                    int index = names[col].getState((String) thisCase.get(col));                    r[col] = index;                }                dataset.cases.add(r);            }            return dataset;        } catch (Exception e) {            if (lineCount > 0) {                System.out.println("Error in line " + lineCount);                System.out.println(e.toString());            }            throw e;        }    }    /**     *  LOCAL     *     * @param  rawData        Description of Parameter     * @param  names          Description of Parameter     * @return                Description of the Returned Value     * @exception  Exception  Description of Exception     */    /**     *  Convert case from a raw format.     *     * @param  rawData        Description of Parameter     * @param  names          Description of Parameter     * @return                Description of the Returned Value     * @exception  Exception  Description of Exception     */    protected int[] convertCaseInt(Vector rawData,                                   AttributeSpecs[] names) throws Exception {        // Verify size        int size = names.length;        if (rawData.size() != size) {            throw new Exception("DatasetReaderInt.convertLine: found "                    + rawData.size() + ", expecting " + size);        }        int[] v = new int[names.length];        for (int i = 0; i < size; ++i) {            AttributeSpecs a = names[i];            String s = (String) rawData.get(i);            if (getDiscardIncompleteCases() && s.equals("?")) {                return null;            }            try {                AttributeType type = a.getType();                if (type == AttributeType.IGNORE) {                    v[i] = -1;                } else if (type == AttributeType.CONTINUOUS) {                    throw new Exception("DatasetReaderInt.convertLine: internal error: "                            + "Discreate dataset can not have values of type "                            + "CONTINUOUS.");                } else if (type == AttributeType.DISCRETE) {                    v[i] = a.getState(s);                } else {                    throw new Exception("DatasetReaderInt.convertLine: internal error: "                            + "incorrect attribute code");                }            } catch (NumberFormatException e) {                throw new Exception("DatasetReaderInt.convertLine: cannot convert token #"                        + (i + 1) + " to a number (" + e.getMessage() + ")");            } catch (AttributeSpecs.AttributeException e) {                throw new Exception("DatasetReaderInt.convertLine: token #" + (i + 1)                        + " discrete attribute declaration. Have '" + s                        + "', expecting one of '" + a.toString() + "'");            }        }        return v;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -