📄 numtargetstream.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* <p>Title: XELOPES</p>
* <p>Description: Java Data Mining API. Supported standarts: <a href="http://www.dmg.org">Predictive Model Markup Language (PMML 2.0) </a>; <a href="http://www.omg.org/cwm">DataMining specification for Common Warehouse Metamodel (OMG)</a>.</p>
* <p>Copyright: Copyright (c) 2002-2004 prudsys AG</p>
* <p>Company: prudsys, ZSoft</p>
* @authorv Michael Thess
* @version 1.2
*/
package com.prudsys.pdm.Transform.Special;
import java.util.Hashtable;
import java.util.Vector;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Transform.MiningTransformationFactory;
import com.prudsys.pdm.Transform.MiningTransformationStep;
import com.prudsys.pdm.Transform.OneToOne.CategNumMapping;
import com.prudsys.pdm.Transform.OneToOne.Identity;
import com.prudsys.pdm.Utils.IntVector;
/**
* Realization of numerization for a given mining input
* stream. All categorical attributes are numerized with respect
* to the target attribute.
*/
public class NumTargetStream extends VectorTransformationStream {
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Array of hashtables containing assignment of source to target values. */
private Hashtable[] categMaps = null;
/** Name of target attribute. */
private String targetAttributeName = null;
// -----------------------------------------------------------------------
// Constructors
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public NumTargetStream() {
}
/**
* Constructor for given stream.
*
* @param inputStream mining input stream for numerization
*/
public NumTargetStream(MiningInputStream inputStream) {
this.inputStream = inputStream;
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns name of target attribute.
*
* @return name of target attribute
*/
public String getTargetAttributeName() {
return targetAttributeName;
}
/**
* Sets name of target attribute.
*
* @param targetAttributeName new name of target attribute
*/
public void setTargetAttributeName(String targetAttributeName) {
this.targetAttributeName = targetAttributeName;
}
// -----------------------------------------------------------------------
// Transformation methods
// -----------------------------------------------------------------------
/**
* Calculates category mappings for all categorical attributes
* as mean values of target attribute over categories.
*
* @exception MiningException error while calculating categ mappings
*/
private void calcCategMaps() throws MiningException {
// Abbreviations:
MiningDataSpecification metaData = inputStream.getMetaData();
int nAtt = metaData.getAttributesNumber();
MiningAttribute targetAttribute = metaData.getMiningAttribute(targetAttributeName);
// Inits:
int[] nCounts = new int[nAtt];
IntVector[] catNumbs = new IntVector[nAtt];
Vector[] catVals = new Vector[nAtt];
categMaps = new Hashtable[nAtt];
for (int i = 0; i < nAtt; i++) {
if (metaData.getMiningAttribute(i) instanceof CategoricalAttribute) {
int nCateg = ((CategoricalAttribute) metaData.getMiningAttribute(i) ).getCategoriesNumber();
catNumbs[i] = new IntVector(nCateg);
for (int j = 0; j < nCateg; j++)
catNumbs[i].addElement(0);
catVals[i] = new Vector(nCateg);
for (int j = 0; j < nCateg; j++)
catVals[i].addElement( new Double(0) );
categMaps[i] = new Hashtable();
};
};
// Data scan:
inputStream.reset();
while ( inputStream.next() ) {
MiningVector miningVector = inputStream.read();
double targetValue = miningVector.getValue(targetAttribute);
if ( Category.isMissingValue(targetValue) )
continue;
for (int i = 0; i < nAtt; i++) {
double value = miningVector.getValue(i);
if (metaData.getMiningAttribute(i) instanceof CategoricalAttribute) {
if (!Category.isMissingValue(value)) {
int j = (int) value;
if (j == catNumbs[i].size()) {
catNumbs[i].addElement(1); // new category just get
catVals[i].addElement( new Double(targetValue) ); // new value
}
else {
catNumbs[i].setElementAt(catNumbs[i].IntegerAt(j) + 1, j);
double tval = ((Double) catVals[i].elementAt(j)).doubleValue();
double nval = tval + targetValue;
catVals[i].setElementAt( new Double(nval), j);
}
nCounts[i] = nCounts[i] + 1;
};
};
};
};
// Calculate replacements:
for (int i = 0; i < nAtt; i++) {
MiningAttribute ma = metaData.getMiningAttribute(i);
if (ma instanceof CategoricalAttribute) {
CategoricalAttribute ca = (CategoricalAttribute) ma;
for (int j = 0; j < catNumbs[i].size(); j++) {
Category cat = ca.getCategory(j);
if (cat == null)
continue;
int ncat = catNumbs[i].IntegerAt(j);
double val = ((Double) catVals[i].elementAt(j)).doubleValue();
if (ncat > 0) val = val / ncat;
categMaps[i].put( cat, new Double(val) );
};
};
};
}
/**
* Creates mining transformation step for numerization of categorical
* attributes with respect to target attribute.
*
* @return mining transformation step
* @exception MiningException no input stream defined
*/
public MiningTransformationStep createMiningTransformationStep() throws MiningException {
// No mining input stream defined => exception:
if (inputStream == null)
throw new MiningException("No mining input stream defined");
MiningDataSpecification metaData = inputStream.getMetaData();
MiningAttribute targetAttribute = metaData.getMiningAttribute(targetAttributeName);
if (targetAttribute == null)
throw new MiningException("Invalid target attribute name");
// Calculate category mappings:
calcCategMaps();
// Mining transformation factory:
MiningTransformationFactory mtf = new MiningTransformationFactory();
boolean notrans = true;
for (int i = 0; i < metaData.getAttributesNumber(); i++) {
// Get attribute and name:
MiningAttribute mAtt = metaData.getMiningAttribute(i);
String attName = mAtt.getName();
// Don't use excluded attributes, if defined:
if ( excludedAttributeNames != null && excludedAttributeNames.indexOf(attName) > -1)
continue;
// Add numerization if categorical attribute:
if (mAtt instanceof CategoricalAttribute) {
CategNumMapping cnm = new CategNumMapping();
cnm.setSourceName( attName );
cnm.setTargetName( "n_" + attName );
cnm.setMap(categMaps[i]);
mtf.addOneToOneMapping(cnm);
notrans = false;
};
};
// No transformations at all => just 1 required, use first attribute:
if (notrans) {
MiningAttribute mAtt = metaData.getMiningAttribute(0);
Identity id = new Identity();
id.setSourceName( mAtt.getName() );
mtf.addOneToOneMapping(id);
};
// Create transformation step for normalization:
mts = mtf.createMiningTransformationStep();
return mts;
}
// -----------------------------------------------------------------------
// Other methods
// -----------------------------------------------------------------------
/**
* Returns numerization via target description.
*
* @returns description of numerization via target
*/
public String toString() {
String mess = "Numerization via target attribute stream.";
return mess;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -