mathexpression.java

来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 946 行 · 第 1/3 页

JAVA
946
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    MathExpression.java *    Copyright (C) 2004 Prados Julien *    Copyright (C) 2002 Eibe Frank */package weka.filters.unsupervised.attribute;import weka.core.AttributeStats;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Range;import weka.core.SparseInstance;import weka.core.Utils;import weka.filters.Filter;import weka.filters.UnsupervisedFilter;import java.io.IOException;import java.io.Reader;import java.io.Serializable;import java.io.StreamTokenizer;import java.io.StringReader;import java.util.Enumeration;import java.util.HashMap;import java.util.Map;import java.util.Vector;/**  <!-- globalinfo-start --> * Modify numeric attributes according to a given expression * <p/> <!-- globalinfo-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -E &lt;expression&gt; *  Specify the expression to apply. Eg. pow(A,6)/(MEAN+MAX) *  Supported operators are +, -, *, /, pow, log, *  abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),  *  MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse</pre> *  * <pre> -R &lt;index1,index2-index4,...&gt; *  Specify list of columns to ignore. First and last are valid *  indexes. (default none)</pre> *  * <pre> -V *  Invert matching sense (i.e. only modify specified columns)</pre> *  <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz)  * @author Prados Julien (julien.prados@cui.unige.ch)  * @version $Revision: 1.4 $ */public class MathExpression   extends PotentialClassIgnorer   implements UnsupervisedFilter, OptionHandler {    /** for serialization */  static final long serialVersionUID = -3713222714671997901L;    /** Stores which columns to select as a funky range */  protected Range m_SelectCols = new Range();      /** The default modification expression */  public static final String m_defaultExpression = "(A-MIN)/(MAX-MIN)";  /** The modification expression */  private String m_expression = m_defaultExpression;    /** The expression tree*/  private Parser.TreeNode m_expTree = null;    /** Attributes statistics*/  private AttributeStats[] m_attStats;      /**Constructor*/  public MathExpression() {      setInvertSelection(false);  }      /**   * Returns a string describing this filter   *   * @return a description of the filter suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "Modify numeric attributes according to a given expression ";  }    /**   * Sets the format of the input instances.   *   * @param instanceInfo an Instances object containing the input    * instance structure (any instances contained in the object are    * ignored - only the structure is required).   * @return true if the outputFormat may be collected immediately   * @throws Exception if the input format can't be set    * successfully   */  public boolean setInputFormat(Instances instanceInfo)        throws Exception {    m_SelectCols.setUpper(instanceInfo.numAttributes() - 1);    super.setInputFormat(instanceInfo);    setOutputFormat(instanceInfo);    m_attStats = null;    m_expTree = null;    return true;  }  /**   * Input an instance for filtering. Filter requires all   * training instances be read before producing output.   *   * @param instance the input instance   * @return true if the filtered instance may now be   * collected with output().   * @throws IllegalStateException if no input format has been set.   */  public boolean input(Instance instance) throws Exception {    if (getInputFormat() == null) {      throw new IllegalStateException("No input instance format defined");    }    if (m_NewBatch) {      resetQueue();      m_NewBatch = false;    }    if (m_attStats == null) {      bufferInput(instance);      return false;    } else {      convertInstance(instance);      return true;    }  }  /**   * Signify that this batch of input to the filter is finished.    * If the filter requires all instances prior to filtering,   * output() may now be called to retrieve the filtered instances.   *   * @return true if there are instances pending output   * @throws IllegalStateException if no input structure has been defined   * @throws IllegalStateException if no input structure has been defined   */  public boolean batchFinished() throws Exception {    if (getInputFormat() == null) {      throw new IllegalStateException("No input instance format defined");    }    if (m_attStats == null) {      Instances input = getInputFormat();      m_expTree = Parser.parse(getExpression());      m_attStats = new AttributeStats [input.numAttributes()];            for (int i = 0; i < input.numAttributes(); i++) {	if (input.attribute(i).isNumeric() &&	    (input.classIndex() != i)) {	  m_attStats[i] = input.attributeStats(i);	}      }      // Convert pending input instances      for(int i = 0; i < input.numInstances(); i++) {	convertInstance(input.instance(i));      }    }     // Free memory    flushInput();    m_NewBatch = true;    return (numPendingOutput() != 0);  }    /**   * Convert a single instance over. The converted instance is    * added to the end of the output queue.   *   * @param instance the instance to convert   * @throws Exception if instance cannot be converted   */  private void convertInstance(Instance instance) throws Exception {      Instance inst = null;    HashMap symbols = new HashMap(5);    if (instance instanceof SparseInstance) {      double[] newVals = new double[instance.numAttributes()];      int[] newIndices = new int[instance.numAttributes()];      double[] vals = instance.toDoubleArray();      int ind = 0;      for (int j = 0; j < instance.numAttributes(); j++) {        if (m_SelectCols.isInRange(j)) {          	  double value;	  if (instance.attribute(j).isNumeric() &&	    (!Instance.isMissingValue(vals[j])) &&	    (getInputFormat().classIndex() != j)) {              symbols.put("A", new Double(vals[j]));                symbols.put("MAX", new Double(m_attStats[j].numericStats.max));              symbols.put("MIN", new Double(m_attStats[j].numericStats.min));              symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));              symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));              symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));              symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));              symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));              value = m_expTree.eval(symbols);              if (Double.isNaN(value) || Double.isInfinite(value)) {                  System.err.println("WARNING:Error in evaluating the expression: missing value set");                  value = Instance.missingValue();              }	      if (value != 0.0) {	        newVals[ind] = value;	        newIndices[ind] = j;	        ind++;	      }	  } else {	      value = vals[j];	      if (value != 0.0) {	        newVals[ind] = value;	        newIndices[ind] = j;	        ind++;	      }	  }        }      }	      double[] tempVals = new double[ind];      int[] tempInd = new int[ind];      System.arraycopy(newVals, 0, tempVals, 0, ind);      System.arraycopy(newIndices, 0, tempInd, 0, ind);      inst = new SparseInstance(instance.weight(), tempVals, tempInd,                                instance.numAttributes());    } else {      double[] vals = instance.toDoubleArray();      for (int j = 0; j < getInputFormat().numAttributes(); j++) {        if (m_SelectCols.isInRange(j)) {	  if (instance.attribute(j).isNumeric() &&	      (!Instance.isMissingValue(vals[j])) &&	      (getInputFormat().classIndex() != j)) {              symbols.put("A", new Double(vals[j]));                symbols.put("MAX", new Double(m_attStats[j].numericStats.max));              symbols.put("MIN", new Double(m_attStats[j].numericStats.min));              symbols.put("MEAN", new Double(m_attStats[j].numericStats.mean));              symbols.put("SD", new Double(m_attStats[j].numericStats.stdDev));              symbols.put("COUNT", new Double(m_attStats[j].numericStats.count));              symbols.put("SUM", new Double(m_attStats[j].numericStats.sum));              symbols.put("SUMSQUARED", new Double(m_attStats[j].numericStats.sumSq));              vals[j] = m_expTree.eval(symbols);              if (Double.isNaN(vals[j]) || Double.isInfinite(vals[j])) {                  System.err.println("WARNING:Error in Evaluation the Expression: missing value set");                  vals[j] = Instance.missingValue();              }	  }        }      }      inst = new Instance(instance.weight(), vals);    }    inst.setDataset(instance.dataset());    push(inst);  }  /**   * Parses a given list of options. <p/>   *    <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -E &lt;expression&gt;   *  Specify the expression to apply. Eg. pow(A,6)/(MEAN+MAX)   *  Supported operators are +, -, *, /, pow, log,   *  abs, cos, exp, sqrt, tan, sin, ceil, floor, rint, (, ),    *  MEAN, MAX, MIN, SD, COUNT, SUM, SUMSQUARED, ifelse</pre>   *    * <pre> -R &lt;index1,index2-index4,...&gt;   *  Specify list of columns to ignore. First and last are valid   *  indexes. (default none)</pre>   *    * <pre> -V   *  Invert matching sense (i.e. only modify specified columns)</pre>   *    <!-- options-end -->   *   * @param options the list of options as an array of strings   * @throws Exception if an option is not supported   */  public void setOptions(String[] options) throws Exception {    String expString = Utils.getOption('E', options);    if (expString.length() != 0) {      setExpression(expString);    } else {      setExpression(m_defaultExpression);    }        String ignoreList = Utils.getOption('R', options);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?