📄 binning.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Transform.OneToMultiple;
import java.util.Vector;
import com.prudsys.pdm.Adapters.PmmlVersion20.DerivedField;
import com.prudsys.pdm.Adapters.PmmlVersion20.NormDiscrete;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Transform.OneToMultipleMapping;
import com.prudsys.pdm.Utils.IntVector;
/**
* Transforms a categorical attribute into a set of binary attributes
* which are of numeric attribute type. The number of binary attributes
* corresponds to the number of categories of the source attribute.
* In general, not invertable.
*/
public class Binning extends OneToMultipleMapping
{
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Vector containing all categories of source attribute in given order. */
private Vector categories = null;
/** Stores permuations according to basis transformation. */
private IntVector indexAssign = new IntVector();
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public Binning()
{
oneToMultipleMapping = true;
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns category list.
*
* @return category list
*/
public Vector getCategories()
{
return categories;
}
/**
* Sets category list. If null is set, the list is created during the
* transformAttribute operation.
*
* @param categories new category list
*/
public void setCategories(Vector categories)
{
this.categories = categories;
}
// -----------------------------------------------------------------------
// Transformation methods
// -----------------------------------------------------------------------
/**
* Transforms the source attribute. The result is the set of binary attributes
* representing each category of the source attribute.
*
* If the category list is empty, the list is created from the source
* attribute in the same order as the categories appear in the meta data.
* Otherwise, the category list is used to define the order of the
* binary attributes.
*
* If no target attribute names are defined (classifierName is null),
* they are automatically created using the following rule:
* <name of binary attribute i> = "tb_"+<source att. name>+<i-th category display value>
*
* @return transformed attributes
* @exception MiningException could not transform attributes
*/
public MiningAttribute[] transformAttribute() throws MiningException
{
// Get source attribute:
MiningAttribute miningAtt = getSourceAttribute(-1);
if (miningAtt == null)
throw new MiningException("Could not find source attribute: " + featureName);
if (! (miningAtt instanceof CategoricalAttribute))
throw new MiningException("Source attribute '" + featureName + "' must be categorical");
CategoricalAttribute categoricalAttribute = (CategoricalAttribute) miningAtt;
int n = categoricalAttribute.getCategoriesNumber();
indexAssign.setSize(n);
for (int i = 0; i < n; i++)
indexAssign.setElementAt(-1, i);
// Check for given category list:
boolean useCatList = false;
if (categories != null && categories.size() > 0)
useCatList = true;
else
categories = new Vector();
if (useCatList) n = categories.size();
// Check for given target names:
boolean useTarNames = false;
if (classifierName != null && classifierName.length >= n)
useTarNames = true;
else
classifierName = new String[n];
// Create set of binary attributes:
NumericAttribute transformedAttribute[] = new NumericAttribute[n];
for (int i = 0; i < n; i++)
{
// No category list => fill list:
if (! useCatList) {
Category categ = categoricalAttribute.getCategory(i);
categories.addElement(categ);
};
// Get current category and key:
Category categ = (Category) categories.elementAt(i);
double key = categoricalAttribute.getKey(categ);
if ( Category.isMissingValue(key) ) {
// System.out.println("source attribute '" + featureName + "' contains unknown category");
}
else
indexAssign.setElementAt(i, (int)key);
// Get target name:
String bname;
if (!useTarNames) {
bname = "tb_" + featureName + "_" + categ.getDisplayValue();
classifierName[i] = bname;
}
else {
bname = classifierName[i];
};
// Create binary attribute:
transformedAttribute[i] = new NumericAttribute();
transformedAttribute[i].setName( bname );
transformedAttribute[i].setCyclic( false );
transformedAttribute[i].setDiscrete( true );
transformedAttribute[i].setTime( false );
transformedAttribute[i].setLowerBound( 0 );
transformedAttribute[i].setUpperBound( 1 );
}
return transformedAttribute;
}
/**
* Transforms one attribute value representing the key of a category.
* The result is the array of values of all binary attributes.
*
* @param attributeValues value of attribute to be transformed
* @return tranformed values
* @exception MiningException could not transform attribute values
*/
public double[] transformAttributeValue( double[] attributeValues ) throws MiningException
{
if (attributeValues == null || attributeValues.length == 0)
throw new MiningException("No value to transform");
int n = categories.size();
double transformedValue[] = new double[n];
for (int i = 0; i < n; i++)
{
double value = attributeValues[0];
// Missing value:
if ( Category.isMissingValue(value) )
{
transformedValue[i] = Category.MISSING_VALUE;
continue;
}
// New category of source attribute:
int ivalue = (int) value;
int nsize = indexAssign.size();
if (ivalue >= nsize) {
for (int j = nsize-1; j < ivalue-1; j++)
indexAssign.addElement(-1);
CategoricalAttribute cAtt = (CategoricalAttribute) getSourceAttribute(-1);
Category cat = cAtt.getCategory(ivalue);
int ii = -1;
if (cat != null) {
for (int j = 0; j < n; j++)
if ( cat.equals( categories.elementAt(j) )) {
ii = j;
break;
};
}
indexAssign.addElement(ii);
}
// Binning:
int tval = indexAssign.IntegerAt( ivalue );
if( i == tval )
{
transformedValue[i] = 1;
}
else
{
transformedValue[i] = 0;
}
};
return transformedValue;
}
// -----------------------------------------------------------------------
// Methods of PMML handling
// -----------------------------------------------------------------------
/**
* Creates array of derived values corresponding to the binning attributes.
*
* @return array of DerivedValue
* @throws MiningException
*/
public Object[] createPmmlObjects() throws MiningException
{
DerivedField[] fields = new DerivedField[classifierName.length];
for (int i = 0; i < classifierName.length; i++) {
fields[i] = new DerivedField();
if ( isRemoveSourceAttributes() )
fields[i].setRemoveSourceAtt("1");
fields[i].setName( classifierName[i] );
String value = ( (Category)categories.elementAt(i) ).getDisplayValue();
fields[i].setDisplayName( "Transformed " + featureName);
NormDiscrete normDiscrete = new NormDiscrete();
normDiscrete.setField( featureName );
normDiscrete.setMethod( "indicator" );
normDiscrete.setValue( value );
fields[i].setNormDiscrete(normDiscrete);
};
return fields;
}
/**
* Creates binning object from array of derived values corresponding
* to the binning attributes.
*
* @param pmml array of DerivedValue objects corresponding to the binary attributes
* @throws MiningException cannot parse PMML object
*/
public void parsePmmlObjects(Object[] pmml) throws MiningException
{
DerivedField[] fields = (DerivedField[]) pmml;
classifierName = new String[fields.length];
categories = new Vector();
setRemoveSourceAttributes(false);
for (int i = 0; i < fields.length; i++) {
classifierName[i] = fields[i].getName();
String catVal = fields[i].getNormDiscrete().getValue();
categories.addElement( new Category(catVal) );
if ( fields[i].getRemoveSourceAtt()!= null && fields[i].getRemoveSourceAtt().equals("1") )
setRemoveSourceAttributes(true);
}
com.prudsys.pdm.Adapters.PmmlVersion20.NormDiscrete disc = fields[0].getNormDiscrete();
featureName = disc.getField();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -