⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 numtargetstream.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/**
 * <p>Title: XELOPES</p>
 * <p>Description: Java Data Mining API. Supported standarts: <a href="http://www.dmg.org">Predictive Model Markup Language (PMML 2.0) </a>;  <a href="http://www.omg.org/cwm">DataMining specification for Common Warehouse Metamodel (OMG)</a>.</p>
 * <p>Copyright: Copyright (c) 2002-2004 prudsys AG</p>
 * <p>Company: prudsys, ZSoft</p>
 * @authorv Michael Thess
 * @version 1.2
 */
package com.prudsys.pdm.Transform.Special;

import java.util.Hashtable;
import java.util.Vector;

import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Transform.MiningTransformationFactory;
import com.prudsys.pdm.Transform.MiningTransformationStep;
import com.prudsys.pdm.Transform.OneToOne.CategNumMapping;
import com.prudsys.pdm.Transform.OneToOne.Identity;
import com.prudsys.pdm.Utils.IntVector;

/**
 * Realization of numerization for a given mining input
 * stream. All categorical attributes are numerized with respect
 * to the target attribute.
 */
public class NumTargetStream extends VectorTransformationStream {

  // -----------------------------------------------------------------------
  //  Variables declarations
  // -----------------------------------------------------------------------
  /** Array of hashtables containing assignment of source to target values. */
  private Hashtable[] categMaps = null;

  /** Name of target attribute. */
  private String targetAttributeName = null;

  // -----------------------------------------------------------------------
  //  Constructors
  // -----------------------------------------------------------------------
  /**
   * Empty constructor.
   */
  public NumTargetStream() {
  }

  /**
   * Constructor for given stream.
   *
   * @param inputStream mining input stream for numerization
   */
  public NumTargetStream(MiningInputStream inputStream) {

    this.inputStream = inputStream;
  }

  // -----------------------------------------------------------------------
  //  Getter and setter methods
  // -----------------------------------------------------------------------
  /**
   * Returns name of target attribute.
   *
   * @return name of target attribute
   */
  public String getTargetAttributeName() {

    return targetAttributeName;
  }

  /**
   * Sets name of target attribute.
   *
   * @param targetAttributeName new name of target attribute
   */
  public void setTargetAttributeName(String targetAttributeName) {

    this.targetAttributeName = targetAttributeName;
  }

  // -----------------------------------------------------------------------
  //  Transformation methods
  // -----------------------------------------------------------------------
  /**
   * Calculates category mappings for all categorical attributes
   * as mean values of target attribute over categories.
   *
   * @exception MiningException error while calculating categ mappings
   */
  private void calcCategMaps() throws MiningException {

    // Abbreviations:
    MiningDataSpecification metaData = inputStream.getMetaData();
    int nAtt = metaData.getAttributesNumber();
    MiningAttribute targetAttribute = metaData.getMiningAttribute(targetAttributeName);

    // Inits:
    int[] nCounts        = new int[nAtt];
    IntVector[] catNumbs = new IntVector[nAtt];
    Vector[] catVals     = new Vector[nAtt];
    categMaps            = new Hashtable[nAtt];
    for (int i = 0; i < nAtt; i++) {
      if (metaData.getMiningAttribute(i) instanceof CategoricalAttribute) {
        int nCateg   = ((CategoricalAttribute) metaData.getMiningAttribute(i) ).getCategoriesNumber();
        catNumbs[i]  = new IntVector(nCateg);
        for (int j = 0; j < nCateg; j++)
          catNumbs[i].addElement(0);
        catVals[i]   = new Vector(nCateg);
        for (int j = 0; j < nCateg; j++)
          catVals[i].addElement( new Double(0) );
        categMaps[i] = new Hashtable();
      };
    };

    // Data scan:
    inputStream.reset();
    while ( inputStream.next() ) {
      MiningVector miningVector = inputStream.read();
      double targetValue = miningVector.getValue(targetAttribute);
      if ( Category.isMissingValue(targetValue) )
        continue;
      for (int i = 0; i < nAtt; i++) {
        double value = miningVector.getValue(i);
        if (metaData.getMiningAttribute(i) instanceof CategoricalAttribute) {
          if (!Category.isMissingValue(value)) {
            int j = (int) value;
            if (j == catNumbs[i].size()) {
              catNumbs[i].addElement(1); // new category just get
              catVals[i].addElement( new Double(targetValue) );  // new value
            }
            else {
              catNumbs[i].setElementAt(catNumbs[i].IntegerAt(j) + 1, j);
              double tval = ((Double) catVals[i].elementAt(j)).doubleValue();
              double nval = tval + targetValue;
              catVals[i].setElementAt( new Double(nval), j);
            }
            nCounts[i] = nCounts[i] + 1;
          };
        };
      };
    };

    // Calculate replacements:
    for (int i = 0; i < nAtt; i++) {
      MiningAttribute ma = metaData.getMiningAttribute(i);
      if (ma instanceof CategoricalAttribute) {
        CategoricalAttribute ca = (CategoricalAttribute) ma;
        for (int j = 0; j < catNumbs[i].size(); j++) {
          Category cat = ca.getCategory(j);
          if (cat == null)
            continue;
          int ncat   = catNumbs[i].IntegerAt(j);
          double val = ((Double) catVals[i].elementAt(j)).doubleValue();
          if (ncat > 0) val = val / ncat;
          categMaps[i].put( cat, new Double(val) );
        };
      };
    };
  }

  /**
   * Creates mining transformation step for numerization of categorical
   * attributes with respect to target attribute.
   *
   * @return mining transformation step
   * @exception MiningException no input stream defined
   */
  public MiningTransformationStep createMiningTransformationStep() throws MiningException  {

      // No mining input stream defined => exception:
      if (inputStream == null)
        throw new MiningException("No mining input stream defined");
      MiningDataSpecification metaData = inputStream.getMetaData();
      MiningAttribute targetAttribute  = metaData.getMiningAttribute(targetAttributeName);
      if (targetAttribute == null)
        throw new MiningException("Invalid target attribute name");

      // Calculate category mappings:
      calcCategMaps();

      // Mining transformation factory:
      MiningTransformationFactory mtf = new MiningTransformationFactory();

      boolean notrans = true;
      for (int i = 0; i < metaData.getAttributesNumber(); i++) {
        // Get attribute and name:
        MiningAttribute mAtt = metaData.getMiningAttribute(i);
        String attName = mAtt.getName();

        // Don't use excluded attributes, if defined:
        if ( excludedAttributeNames != null && excludedAttributeNames.indexOf(attName) > -1)
          continue;

        // Add numerization if categorical attribute:
        if (mAtt instanceof CategoricalAttribute) {
          CategNumMapping cnm = new CategNumMapping();
          cnm.setSourceName( attName );
          cnm.setTargetName( "n_" + attName );
          cnm.setMap(categMaps[i]);
          mtf.addOneToOneMapping(cnm);
          notrans = false;
        };
      };

      // No transformations at all => just 1 required, use first attribute:
      if (notrans) {
        MiningAttribute mAtt = metaData.getMiningAttribute(0);
        Identity id          = new Identity();
        id.setSourceName( mAtt.getName() );
        mtf.addOneToOneMapping(id);
      };

      // Create transformation step for normalization:
      mts = mtf.createMiningTransformationStep();

      return mts;
  }

  // -----------------------------------------------------------------------
  //  Other methods
  // -----------------------------------------------------------------------
  /**
   * Returns numerization via target description.
   *
   * @returns description of numerization via target
   */
  public String toString() {

    String mess = "Numerization via target attribute stream.";

    return mess;
  }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -