📄 c45learner.java
字号:
/* * YALE - Yet Another Learning Environment * Copyright (C) 2002, 2003 * Simon Fischer, Ralf Klinkenberg, Ingo Mierswa, * Katharina Morik, Oliver Ritthoff * Artificial Intelligence Unit * Computer Science Department * University of Dortmund * 44221 Dortmund, Germany * email: yale@ls8.cs.uni-dortmund.de * web: http://yale.cs.uni-dortmund.de/ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */package edu.udo.cs.yale.operator.learner;import edu.udo.cs.yale.operator.parameter.*;import edu.udo.cs.yale.operator.OperatorException;import edu.udo.cs.yale.operator.FatalException;import edu.udo.cs.yale.operator.learner.decisiontree.RuleSet;import edu.udo.cs.yale.operator.learner.decisiontree.Rule;import edu.udo.cs.yale.example.ExampleReader;import edu.udo.cs.yale.example.ExampleSet;import edu.udo.cs.yale.example.Example;import edu.udo.cs.yale.example.Attribute;import edu.udo.cs.yale.tools.LogService;import edu.udo.cs.yale.tools.TempFileService;import edu.udo.cs.yale.tools.Ontology;import edu.udo.cs.yale.tools.ParameterService;import edu.udo.cs.yale.tools.Tools;import java.util.List;import java.util.Iterator;import java.io.*;/** <code>C45Learner</code> encapsulates the * <a TARGET="_top" href="http://www.cse.unsw.edu.au/~quinlan/">C4.5</a> program version 8 * by Ross Quinlan {@yale.cite Quinlan/93b}. * It generates all input files required by C4.5 containing attribute names, * ranges, and data and starts an external process to invoke C4.5. * The output of C4.5 is parsed into a {@link edu.udo.cs.yale.operator.learner.decisiontree.RuleSet} * which is a subclass of {@link Model} and can be applied without further calling of * external programs. * * @yale.xmlclass C45Learner * @yale.reference Quinlan/93b * @see edu.udo.cs.yale.operator.learner.decisiontree.RuleSet * @author Ingo Mierswa * @version $Id: C45Learner.java,v 2.6 2003/08/14 10:24:57 fischer Exp $ */public class C45Learner extends Learner { /** Parameters without value. */ static final String[] SINGLE_PARAMETER = { "u", "s", "p", "g" }; /** Parameters which need a value. */ static final String[] PARAMETER = { "m", "c" }; /** Starts the external process and feeds parameters and data to stdin. * @return A RuleSet */ public Model learn(ExampleSet exampleSet) throws OperatorException { LogService.logMessage("C4.5 learner '"+getName()+"': " +"starts learning.", LogService.TASK); // ---- Eingabedateien des C4.5 erzeugen ---- File[] files = new File[2]; String[] extensions = { ".names", ".data" }; String filestem = TempFileService.createTempFiles(getName()+"_", extensions, files); writeExamples(exampleSet, files); // ---- Prozess erzeugen ---- String learnCommand = ParameterService.getProperty("yale.c45.learncommand"); String ruleCommand = ParameterService.getProperty("yale.c45.rulecommand"); String parameterString = parseParameters(); // c4.5 process try { Process process = null; process = Runtime.getRuntime().exec(learnCommand + " " + parameterString + " " + "-f " + filestem); // ---- get Output ---- BufferedReader in = new BufferedReader(new InputStreamReader(process.getInputStream())); String output = Tools.readOutput(in); if ((output==null)||(output.equals(""))) LogService.logMessage("C4.5 learner '" + getName() + "': " + "No output of C4.5.", LogService.WARNING); in.close(); LogService.logMessage("Output of C4.5 (Learner: '"+getName()+"')\n"+output, LogService.MINIMUM); } catch (IOException e) { throw new FatalException("C4.5Learner '" + getName() + "': " + "Error in c4.5 process: " + learnCommand, e); } // c4.5 rules process try { Process process = Runtime.getRuntime().exec(ruleCommand + " " + "-f " + filestem); BufferedReader in = new BufferedReader(new InputStreamReader(process.getInputStream())); RuleSet rules = RuleSet.parseC45Rules(in, exampleSet); in.close(); Tools.waitForProcess(this, process, "c45rules"); LogService.logMessage("C4.5 learner '"+getName()+"': C4.5 has succesfully " +"learned an example set.", LogService.TASK); return rules; } catch (IOException e) { throw new FatalException("C4.5 learner '" + getName() + "': " + "Error in c4.5rules process: " + ruleCommand, e); } } /** Constructs a parameter string from the parameters set in the configuration file. */ private String parseParameters() { String params=""; for (int i = 0; i < SINGLE_PARAMETER.length; i++) { String value = getParameterAsString(SINGLE_PARAMETER[i]); if (value != null) params += SINGLE_PARAMETER[i]; } for (int i = 0; i < PARAMETER.length; i++) { String value = getParameterAsString(PARAMETER[i]); if (value != null) params += PARAMETER[i] + value; } return params; } /** First writes names and ranges of the attributs in the format <br> * class1,class2,...,classN. <br> * att1: range1. <br> * att2: range2. <br> * ... <br> * to the file "filestem.names" and then writes the examples in the format<br> * att1,att2,..., */ private void writeExamples(ExampleSet exampleSet, File[] files) throws OperatorException { // ---- Schreiben der Namen und Bereiche in filestem.names ---- try { PrintWriter nameout = new PrintWriter(new FileWriter(files[0])); // classes Iterator classes = exampleSet.getLabel().getValuesAsString().iterator(); while (classes.hasNext()) { String classString = (String)classes.next(); nameout.print(classString); if (classes.hasNext()) nameout.print(","); } nameout.println("."); // namen und bereiche for (int n = 0; n < exampleSet.getNumberOfAttributes(); n++) { Attribute attribute = exampleSet.getAttribute(n); nameout.print(attribute.getName()+": "); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) { nameout.print("continuous"); } else { Iterator values = attribute.getValuesAsString().iterator(); while (values.hasNext()) { String valueString = (String)values.next(); nameout.print(valueString); if (values.hasNext()) nameout.print(","); } } nameout.println("."); } nameout.close(); } catch (java.io.IOException e) { throw new FatalException("C45Learner [parseExamples(ExampleSet, PrintStream)] '"+getName()+ "': There was an exception by writing the data in "+ files[0].getName() + "!", e); } // ---- Schreiben der Daten in filestem.data ---- try { PrintWriter dataout = new PrintWriter(new FileWriter(files[1])); ExampleReader r = exampleSet.getExampleReader(); while (r.hasNext()) { // ---- Beispiel auslesen ---- Example example = (Example)r.next(); if (example==null) { throw new FatalException("C45Learner [parseExamples(ExampleSet, String)] '" + getName() + "': " +"example is null!"); } // ---- Attribute bestimmen und rausschreiben ---- String attributes = example.getAttributesAsString(","); if ((attributes==null) || (attributes.equals(""))) { throw new FatalException("C45Learner [parseExamples(ExampleSet, PrintStream)] '" + getName() + "': " +"There are no attributes of example!"); } // ---- Label bestimmen und rausschreiben ---- String label = example.getLabelAsString(); dataout.print(attributes); dataout.println(","+label + "."); } dataout.close(); } catch (java.io.IOException e) { throw new FatalException("C45Learner [parseExamples(ExampleSet, PrintStream)] '" + getName() + "': There was an exception by writing the data in " + files[1].getName() + "!", e); } } public List getParameterTypes() { List types = super.getParameterTypes(); for (int i = 0; i < SINGLE_PARAMETER.length; i++) types.add(new ParameterTypeString(SINGLE_PARAMETER[i], "The C4.5 parameter "+SINGLE_PARAMETER[i]+" (no value needed).")); for (int i = 0; i < PARAMETER.length; i++) types.add(new ParameterTypeString(PARAMETER[i], "The C4.5 parameter "+PARAMETER[i]+".")); return types; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -