⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 c45saver.java

📁 MacroWeka扩展了著名数据挖掘工具weka
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    C45Saver.java
 *    Copyright (C) 2004 Stefan Mutter
 *
 */

package weka.core.converters;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.io.OutputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Enumeration;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.Attribute;
import weka.core.Utils;
import weka.core.OptionHandler;
import weka.core.Option;
import weka.core.FastVector;

/**
 * Writes to a destination in the format used by the C4.5 slgorithm.
 * The output are two files: *.names, *.data
 *
 * Valid options:
 *
 * -i input arff file <br>
 * The input filw in ARFF format. <p>
 *
 * -o the output file <br>
 * The output file. The prefix of the output file is sufficient.<p>
 *
 * -c class index <br>
 * The index of the class attribute. first and last are valid as well (default: last). <p>
 *
 * @author Stefan Mutter (mutter@cs.waikato.ac.nz)
 * @version $Revision: 1.1 $
 * @see Saver
 */
public class C45Saver extends AbstractFileSaver implements BatchConverter, IncrementalConverter, OptionHandler {

  /** Constructor */  
  public C45Saver(){
  
      resetOptions();
  }
   
  /**
   * Returns a string describing this Saver
   * @return a description of the Saver suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Writes to a destination that is in the format used by the C4.5 algorithm.\nTherefore it outputs a names and a data file.";
  }

  
  /**
   * Returns a description of the file type.
   *
   * @return a short file description
   */
  public String getFileDescription() {
    return "C4.5 file format";
  }

  /**
   * Resets the Saver 
   */
  public void resetOptions() {

    super.resetOptions();
    setFileExtension(".names");
  }

/** Saves an instances incrementally. Structure has to be set by using the
   * setStructure() method or setInstances() method.
   * @param inst the instance to save
   * @throws IOException throws IOEXception if an instance cannot be saved incrementally.
   */  
    public void writeIncremental(Instance inst) throws IOException{
  
      int writeMode = getWriteMode();
      Instances structure = getInstances();
      PrintWriter outW = null;
      
      if(structure != null){
          if(structure.classIndex() == -1){
            structure.setClassIndex(structure.numAttributes()-1);
            System.err.println("No class specified. Last attribute is used as class attribute.");
          }
          if(structure.attribute(structure.classIndex()).isNumeric())
            throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
      }
      if(getRetrieval() == BATCH || getRetrieval() == NONE)
          throw new IOException("Batch and incremental saving cannot be mixed.");
      if(retrieveFile() == null || getWriter() == null){
          throw new IOException("C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
      }
      
      
      outW = new PrintWriter(getWriter());
          
      if(writeMode == WAIT){
        if(structure == null){
            setWriteMode(CANCEL);
            if(inst != null)
                System.err.println("Structure(Header Information) has to be set in advance");
        }
        else
            setWriteMode(STRUCTURE_READY);
        writeMode = getWriteMode();
      }
      if(writeMode == CANCEL){
          if(outW != null)
              outW.close();
          cancel();
      }
      if(writeMode == STRUCTURE_READY){
          setWriteMode(WRITE);
          //write header: here names file
          for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) {
            outW.write(structure.attribute(structure.classIndex()).value(i));
            if (i < structure.attribute(structure.classIndex()).numValues()-1) {
                outW.write(",");
            } else {
                outW.write(".\n");
            }
          }
          for (int i = 0; i < structure.numAttributes(); i++) {
            if (i != structure.classIndex()) {
                outW.write(structure.attribute(i).name()+": ");
                if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) {
                    outW.write("continuous.\n");
                } else {
                    Attribute temp = structure.attribute(i);
                    for (int j = 0; j < temp.numValues(); j++) {
                        outW.write(temp.value(j));
                        if (j < temp.numValues()-1) {
                            outW.write(",");
                        } else {
                            outW.write(".\n");
                        }
                    }
                }
            }
          }
          outW.flush();
          outW.close();
          
          writeMode = getWriteMode();
          
          String out = retrieveFile().getAbsolutePath();
          setFileExtension(".data");
          out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
          File namesFile = new File(out);
          try{
            setFile(namesFile);
            setDestination(namesFile);
          } catch(Exception ex){
            throw new IOException("Cannot create data file, only names file created.");
          }
          if(retrieveFile() == null || getWriter() == null){
            throw new IOException("Cannot create data file, only names file created.");
          }
          outW = new PrintWriter(getWriter());
      }
      if(writeMode == WRITE){
          if(structure == null)
              throw new IOException("No instances information available.");
          if(inst != null){
            //write instance: here data file
            for(int j = 0; j < inst.numAttributes(); j++){
                if(j != structure.classIndex()){
                    if (inst.isMissing(j)) {
                        outW.write("?,");
                    } else 
                        if (structure.attribute(j).isNominal() || 
                            structure.attribute(j).isString()) {
                                outW.write(structure.attribute(j).value((int)inst.value(j))+",");
                        } else {
                                outW.write(""+inst.value(j)+",");
                        }
                    }
            }
            // write the class value
            if (inst.isMissing(structure.classIndex())) {
                outW.write("?");
            } 
            else {
                outW.write(structure.attribute(structure.classIndex()).value((int)inst.value(structure.classIndex())));
            }
            outW.write("\n");
            //flushes every 100 instances
            m_incrementalCounter++;
            if(m_incrementalCounter > 100){
                m_incrementalCounter = 0;
                outW.flush();
            }
          }
          else{
          //close
              if(outW != null){
                outW.flush();
                outW.close();
              }
              setFileExtension(".names");
              m_incrementalCounter = 0;
              resetStructure();
          }
      }
  }

  
  /** Writes a Batch of instances
   * @throws IOException throws IOException if saving in batch mode is not possible
   */
  public void writeBatch() throws IOException {
      
      Instances instances = getInstances();
      
      if(instances == null)
          throw new IOException("No instances to save");
      if(instances.classIndex() == -1){
          instances.setClassIndex(instances.numAttributes()-1);
          System.err.println("No class specified. Last attribute is used as class attribute.");
      }
      if(instances.attribute(instances.classIndex()).isNumeric())
          throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
      if(getRetrieval() == INCREMENTAL)
          throw new IOException("Batch and incremental saving cannot be mixed.");
      

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -