📄 c45saver.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* C45Saver.java
* Copyright (C) 2004 Stefan Mutter
*
*/
package weka.core.converters;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.io.OutputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Enumeration;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Attribute;
import weka.core.Utils;
import weka.core.OptionHandler;
import weka.core.Option;
import weka.core.FastVector;
/**
* Writes to a destination in the format used by the C4.5 slgorithm.
* The output are two files: *.names, *.data
*
* Valid options:
*
* -i input arff file <br>
* The input filw in ARFF format. <p>
*
* -o the output file <br>
* The output file. The prefix of the output file is sufficient.<p>
*
* -c class index <br>
* The index of the class attribute. first and last are valid as well (default: last). <p>
*
* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
* @version $Revision: 1.1 $
* @see Saver
*/
public class C45Saver extends AbstractFileSaver implements BatchConverter, IncrementalConverter, OptionHandler {
/** Constructor */
public C45Saver(){
resetOptions();
}
/**
* Returns a string describing this Saver
* @return a description of the Saver suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Writes to a destination that is in the format used by the C4.5 algorithm.\nTherefore it outputs a names and a data file.";
}
/**
* Returns a description of the file type.
*
* @return a short file description
*/
public String getFileDescription() {
return "C4.5 file format";
}
/**
* Resets the Saver
*/
public void resetOptions() {
super.resetOptions();
setFileExtension(".names");
}
/** Saves an instances incrementally. Structure has to be set by using the
* setStructure() method or setInstances() method.
* @param inst the instance to save
* @throws IOException throws IOEXception if an instance cannot be saved incrementally.
*/
public void writeIncremental(Instance inst) throws IOException{
int writeMode = getWriteMode();
Instances structure = getInstances();
PrintWriter outW = null;
if(structure != null){
if(structure.classIndex() == -1){
structure.setClassIndex(structure.numAttributes()-1);
System.err.println("No class specified. Last attribute is used as class attribute.");
}
if(structure.attribute(structure.classIndex()).isNumeric())
throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
}
if(getRetrieval() == BATCH || getRetrieval() == NONE)
throw new IOException("Batch and incremental saving cannot be mixed.");
if(retrieveFile() == null || getWriter() == null){
throw new IOException("C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
}
outW = new PrintWriter(getWriter());
if(writeMode == WAIT){
if(structure == null){
setWriteMode(CANCEL);
if(inst != null)
System.err.println("Structure(Header Information) has to be set in advance");
}
else
setWriteMode(STRUCTURE_READY);
writeMode = getWriteMode();
}
if(writeMode == CANCEL){
if(outW != null)
outW.close();
cancel();
}
if(writeMode == STRUCTURE_READY){
setWriteMode(WRITE);
//write header: here names file
for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) {
outW.write(structure.attribute(structure.classIndex()).value(i));
if (i < structure.attribute(structure.classIndex()).numValues()-1) {
outW.write(",");
} else {
outW.write(".\n");
}
}
for (int i = 0; i < structure.numAttributes(); i++) {
if (i != structure.classIndex()) {
outW.write(structure.attribute(i).name()+": ");
if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) {
outW.write("continuous.\n");
} else {
Attribute temp = structure.attribute(i);
for (int j = 0; j < temp.numValues(); j++) {
outW.write(temp.value(j));
if (j < temp.numValues()-1) {
outW.write(",");
} else {
outW.write(".\n");
}
}
}
}
}
outW.flush();
outW.close();
writeMode = getWriteMode();
String out = retrieveFile().getAbsolutePath();
setFileExtension(".data");
out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
File namesFile = new File(out);
try{
setFile(namesFile);
setDestination(namesFile);
} catch(Exception ex){
throw new IOException("Cannot create data file, only names file created.");
}
if(retrieveFile() == null || getWriter() == null){
throw new IOException("Cannot create data file, only names file created.");
}
outW = new PrintWriter(getWriter());
}
if(writeMode == WRITE){
if(structure == null)
throw new IOException("No instances information available.");
if(inst != null){
//write instance: here data file
for(int j = 0; j < inst.numAttributes(); j++){
if(j != structure.classIndex()){
if (inst.isMissing(j)) {
outW.write("?,");
} else
if (structure.attribute(j).isNominal() ||
structure.attribute(j).isString()) {
outW.write(structure.attribute(j).value((int)inst.value(j))+",");
} else {
outW.write(""+inst.value(j)+",");
}
}
}
// write the class value
if (inst.isMissing(structure.classIndex())) {
outW.write("?");
}
else {
outW.write(structure.attribute(structure.classIndex()).value((int)inst.value(structure.classIndex())));
}
outW.write("\n");
//flushes every 100 instances
m_incrementalCounter++;
if(m_incrementalCounter > 100){
m_incrementalCounter = 0;
outW.flush();
}
}
else{
//close
if(outW != null){
outW.flush();
outW.close();
}
setFileExtension(".names");
m_incrementalCounter = 0;
resetStructure();
}
}
}
/** Writes a Batch of instances
* @throws IOException throws IOException if saving in batch mode is not possible
*/
public void writeBatch() throws IOException {
Instances instances = getInstances();
if(instances == null)
throw new IOException("No instances to save");
if(instances.classIndex() == -1){
instances.setClassIndex(instances.numAttributes()-1);
System.err.println("No class specified. Last attribute is used as class attribute.");
}
if(instances.attribute(instances.classIndex()).isNumeric())
throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
if(getRetrieval() == INCREMENTAL)
throw new IOException("Batch and incremental saving cannot be mixed.");
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -