📄 dataset.java
字号:
/** * JBNC - Bayesian Network Classifiers Toolbox <p> * * Latest release available at http://sourceforge.net/projects/jbnc/ <p> * * Copyright (C) 1999-2003 Jarek Sacha <p> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. <p> * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. <p> * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307, USA. <br> * http://www.fsf.org/licenses/gpl.txt */package jbnc.dataset;import java.io.BufferedWriter;import java.io.FileWriter;import java.io.PrintWriter;import java.util.Vector;/** * Represents a data set. * * @author Jarek Sacha * @since June 1, 1999 * @see jbnc.dataset.NamesReader * @see jbnc.dataset.DatasetReader */public class Dataset { /** * Vector of vectors representing cases. Each case attribute is allocated in * type defined by 'names'. */ public Vector cases = null; /** * Description of each of the attributes in a case. Typically the last * attribute in a case represent the class this case belongs to. */ public AttributeSpecs[] names = null; /** LOCAL */ protected boolean discardIncompleteCases = false; /** Set everything to <tt>null</tt> . */ public Dataset() { cases = null; names = null; } /** * Set names to null. Initialize dataset with the array <tt>data</tt> . * * @param data Array of cases <tt>data[case][variable]</tt> * @exception Exception Description of Exception */ // TODO: Remove unneeded constructor// public Dataset(int[][] data) throws Exception {// names = null;// if (data == null || data.length == 0) {// cases = null;// return;// }//// cases = new Vector();// int caseSize = data[0].length;// for (int c = 0; c < data.length; ++c) {// if (caseSize != data[c].length) {// throw new Exception("Rows of the array are not of equal size.");// }//// Vector v = new Vector();// for (int i = 0; i < caseSize; ++i) {// v.add(new Integer(data[c][i]));// }//// cases.add(v);// }// } /** * @param discardIncompleteCases */ public void setDiscardIncompleteCases(boolean discardIncompleteCases) { this.discardIncompleteCases = discardIncompleteCases; } /** * @return discardIncompleteCases */ public boolean getDiscardIncompleteCases() { return this.discardIncompleteCases; } /** * @param fileStem * @exception Exception */ public void openC45(String fileStem) throws Exception { openC45(fileStem + ".names", fileStem + ".data"); } /** * @param namesFile * @param dataFile * @exception Exception */ public void openC45(String namesFile, String dataFile) throws Exception { clear(); names = NamesReader.open(namesFile); DatasetReader datasetReader = new DatasetReader(); datasetReader.setDiscardIncompleteCases(discardIncompleteCases); cases = datasetReader.open(dataFile, names); } /** * @param fileStem * @exception Exception */ public void saveC45(String fileStem) throws Exception { saveC45(fileStem + ".names", fileStem + ".data"); } /** * @param namesFile * @param dataFile * @exception Exception */ public void saveC45(String namesFile, String dataFile) throws Exception { saveCasesC45(dataFile); saveNamesC45(namesFile); } /** * @param fileName * @exception Exception */ public void saveCasesC45(String fileName) throws Exception { if (cases == null || cases.size() == 0) { throw new Exception("No cases to save."); } if (names == null || names.length == 0) { throw new Exception("Attribute description is empty"); } // Open output file PrintWriter out = new PrintWriter( new BufferedWriter( new FileWriter(fileName))); // Write cases int nbCases = cases.size(); for (int c = 0; c < nbCases; ++c) { Vector thisCase = (Vector) cases.get(c); for (int i = 0; i < names.length; ++i) { String v; if (names[i].getType() == AttributeType.DISCRETE) { int index = ((Integer) thisCase.get(i)).intValue(); v = names[i].getState(index); } else { v = thisCase.get(i).toString(); } out.print(v); if (i < (names.length - 1)) { out.print(","); } } out.println(""); } // Close output file out.close(); } /** * @param fileName * @exception Exception */ public void saveNamesC45(String fileName) throws Exception { if (names == null || names.length == 0) { throw new Exception("Attribute description is empty"); } int nbAttrib = names.length - 1; // Open output file PrintWriter out = new PrintWriter( new BufferedWriter( new FileWriter(fileName))); // Write class String[] classes = names[nbAttrib].getStates(); for (int s = 0; s < classes.length - 1; ++s) { out.print(classes[s] + ", "); } out.println(classes[classes.length - 1] + "."); for (int i = 0; i < nbAttrib; ++i) { out.print(names[i].getName() + " : "); AttributeType type = names[i].getType(); if (type == AttributeType.CONTINUOUS) { out.println("continuous."); } else if (type == AttributeType.DISCRETE) { String[] states = names[i].getStates(); for (int s = 0; s < states.length - 1; ++s) { out.print(states[s] + ", "); } out.println(states[states.length - 1] + "."); } else if (type == AttributeType.DISCRETE_N) { out.println("discrete " + names[i].getStates().length + "."); } else if (type == AttributeType.IGNORE) { out.println("ignore."); } else { throw new Exception("Unrecognized attribute type."); } } // Close output file out.close(); } /** */ public void clear() { names = null; cases = null; } /** * Discards all attributes that match the given type. Removes columns from * the data set and entries from the names vector. * * @param type Type of attributes to be discarded. */ public void discardAllOfType(AttributeType type) { if (cases == null || names == null) { return; } // Check if there is anything to remove boolean gotAny = false; for (int i = 0; i < names.length; ++i) { if (names[i].getType() == type) { gotAny = true; break; } } if (!gotAny) { return; } // Fix cases for (int c = 0; c < cases.size(); ++c) { Vector thisCase = (Vector) cases.get(c); Vector newCase = new Vector(); for (int i = 0; i < names.length; ++i) { if (names[i].getType() != type) { newCase.add(thisCase.get(i)); } } cases.set(c, newCase); } // Fix names Vector newNames = new Vector(); for (int i = 0; i < names.length; ++i) { if (names[i].getType() != type) { newNames.add(names[i]); } } names = new AttributeSpecs[newNames.size()]; for (int i = 0; i < names.length; ++i) { names[i] = (AttributeSpecs) newNames.get(i); } } /** * Performs a shallow copy of itself. Member variables in the clone dataset * are references to variables in the parent dataset. Only primitive types * members, like boolean, are actually copied. * * @return a clone of this dataset. * @exception CloneNotSupportedException */ public Object clone() throws CloneNotSupportedException { Dataset aClone = new Dataset(); aClone.cases = this.cases; aClone.names = this.names; aClone.discardIncompleteCases = this.discardIncompleteCases; return aClone; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -