📄 discretization.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Transform.OneToOne;
import java.util.ArrayList;
import com.prudsys.pdm.Adapters.PmmlVersion20.DerivedField;
import com.prudsys.pdm.Adapters.PmmlVersion20.Discretize;
import com.prudsys.pdm.Adapters.PmmlVersion20.DiscretizeBin;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Core.OrdinalAttribute;
import com.prudsys.pdm.Transform.OneToOneMapping;
import eti.bi.util.NumberFormatter;
/**
* Realization of discretization. Not invertable.
*
* Transforms numeric into categorical attribute.
* Assignment to categories is defined through
* discretization intervals. The intervals are given
* by an array of bounds.
*/
public class Discretization extends OneToOneMapping
{
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Array of bounds formining the discretization intervalls. */
private double[] bounds = null;
// -----------------------------------------------------------------------
// Constructor
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public Discretization()
{
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns array of interval bounds.
*
* @return array of interval bounds
*/
public double[] getBounds()
{
return bounds;
}
/**
* Sets array of interval bounds.
*
* @param bounds new array of interval bounds
*/
public void setBounds(double[] bounds)
{
this.bounds = bounds;
}
// -----------------------------------------------------------------------
// Transformation methods
// -----------------------------------------------------------------------
/**
* Transforms the numeric source attribute. The result is the
* ordinal target attribute.
*
* @return transformed (discretized) attribute
* @exception MiningException could not transform attribute
*/
public MiningAttribute transformAttribute() throws MiningException
{
NumberFormatter numberFormatter = new NumberFormatter();
if (getSourceAttribute() == null)
throw new MiningException("Could not find source attribute: " + sourceName);
if (! (getSourceAttribute() instanceof NumericAttribute))
throw new MiningException("Source attribute '" + sourceName + "' must be numeric");
NumericAttribute sourceAttribute = (NumericAttribute) getSourceAttribute();
OrdinalAttribute transformedAttribute = new OrdinalAttribute();
transformedAttribute.setName( getTargetNameDynamic() );
transformedAttribute.setCyclic( sourceAttribute.isCyclic() );
if (bounds != null && bounds.length >= 0) {
ArrayList categs = new ArrayList();
int nInts = bounds.length + 1;
// 1 Interval:
if (nInts == 1) {
String catName = "(" + String.valueOf( Double.NEGATIVE_INFINITY ) + "," + String.valueOf(Double.POSITIVE_INFINITY) + ")";
Category cat = new Category(catName);
categs.add(cat);
}
else {
String catName = "(" + String.valueOf( Double.NEGATIVE_INFINITY ) + "," + NumberFormatter.format(bounds[0]) + "]";
Category cat = new Category(catName);
categs.add(cat);
for (int i = 1; i <= nInts-2; i++) {
catName = "(" + NumberFormatter.format(bounds[i-1]) + "," + NumberFormatter.format(bounds[i]) + "]";
cat = new Category(catName);
categs.add(cat);
};
catName = "(" + NumberFormatter.format(bounds[nInts-2]) + "," + String.valueOf(Double.POSITIVE_INFINITY) + ")";
cat = new Category(catName);
categs.add(cat);
};
try {
transformedAttribute.setValues(categs);
}
catch (Exception ex) {};
};
return transformedAttribute;
}
/**
* Transforms attribute value. The result is also a value.
*
* @param attributeValue value of attribute to be transformed
* @return transformed (discterized) value
* @exception MiningException could not transform attribute value
*/
public double transformAttributeValue( double attributeValue ) throws MiningException
{
// Missing value:
if (Category.isMissingValue(attributeValue))
return attributeValue;
// Special cases:
if (bounds == null || bounds.length == 0)
return 0;
if (bounds.length == 1) {
if (attributeValue <= bounds[0])
return 0;
else
return 1;
};
// Search index:
if (attributeValue <= bounds[0])
return 0;
for (int i = 1; i < bounds.length; i++) {
if (attributeValue <= bounds[i] && attributeValue > bounds[i-1])
return i;
};
return bounds.length;
}
// -----------------------------------------------------------------------
// Methods of PMML handling
// -----------------------------------------------------------------------
/**
* Creates PMML object DerivedField of this object of Discretize type.
*
* @return DerivedField element
* @exception MiningException cannot create PMML object
*/
public Object createPmmlObject() throws MiningException
{
DerivedField field = (DerivedField) super.createPmmlObject();
Discretize discretize = new Discretize();
discretize.setField( sourceName );
int nInts = bounds.length + 1;
DiscretizeBin[] discBin = new DiscretizeBin[nInts];
// 1 Interval:
if (nInts == 1) {
com.prudsys.pdm.Core.Interval interval = new com.prudsys.pdm.Core.Interval();
interval.setClosure( com.prudsys.pdm.Core.Interval.OPEN_OPEN );
interval.setLowerBound( Double.NEGATIVE_INFINITY );
interval.setUpperBound( Double.POSITIVE_INFINITY );
discBin[0] = new DiscretizeBin();
discBin[0].setBinValue( interval.toString() );
discBin[0].setInterval( (com.prudsys.pdm.Adapters.PmmlVersion20.Interval) interval.createPmmlObject());
}
else {
com.prudsys.pdm.Core.Interval interval = new com.prudsys.pdm.Core.Interval();
interval.setClosure( com.prudsys.pdm.Core.Interval.OPEN_CLOSED );
interval.setLowerBound( Double.NEGATIVE_INFINITY );
interval.setUpperBound( bounds[0] );
discBin[0] = new DiscretizeBin();
discBin[0].setBinValue( interval.toString() );
discBin[0].setInterval( (com.prudsys.pdm.Adapters.PmmlVersion20.Interval) interval.createPmmlObject());
for (int i = 1; i <= nInts-2; i++) {
interval = new com.prudsys.pdm.Core.Interval();
interval.setClosure( com.prudsys.pdm.Core.Interval.OPEN_CLOSED );
interval.setLowerBound( bounds[i-1] );
interval.setUpperBound( bounds[i] );
discBin[i] = new DiscretizeBin();
discBin[i].setBinValue( interval.toString() );
discBin[i].setInterval( (com.prudsys.pdm.Adapters.PmmlVersion20.Interval) interval.createPmmlObject());
};
interval = new com.prudsys.pdm.Core.Interval();
interval.setClosure( com.prudsys.pdm.Core.Interval.OPEN_OPEN );
interval.setLowerBound( bounds[nInts-2] );
interval.setUpperBound( Double.POSITIVE_INFINITY );
discBin[nInts-1] = new DiscretizeBin();
discBin[nInts-1].setBinValue( interval.toString() );
discBin[nInts-1].setInterval( (com.prudsys.pdm.Adapters.PmmlVersion20.Interval) interval.createPmmlObject());
};
discretize.setDiscretizeBin(discBin);
field.setDiscretize(discretize);
return field;
}
/**
* Creates this object from PMML object DerivedField, subobject Discretize.
*
* @param pmml pmml element
* @exception MiningException always thrown
*/
public void parsePmmlObject(Object pmml) throws MiningException
{
super.parsePmmlObject(pmml);
DerivedField field = (DerivedField) pmml;
Discretize disc = field.getDiscretize();
sourceName = disc.getField();
DiscretizeBin[] discBin = disc.getDiscretizeBin();
int nInts = discBin.length;
bounds = new double[nInts-1];
for (int i = 1; i < nInts; i++) {
com.prudsys.pdm.Adapters.PmmlVersion20.Interval interval = discBin[i].getInterval();
String lm = interval.getLeftMargin();
bounds[i-1] = Double.parseDouble(lm);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -