📄 treatoutliervaluestream.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* Title: XELOPES Data Mining Library
* Description: The XELOPES library is an open platform-independent and data-source-independent library for Embedded Data Mining.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: ZSoft (www.zsoft.ru), Prudsys (www.prudsys.com)
* @author Michael Thess
* @version 1.0
*/
package com.prudsys.pdm.Transform.Special;
import com.prudsys.pdm.Core.CategoricalAttribute;
import com.prudsys.pdm.Core.MiningAttribute;
import com.prudsys.pdm.Core.MiningDataSpecification;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.NumericAttribute;
import com.prudsys.pdm.Input.MiningInputStream;
import com.prudsys.pdm.Models.Statistics.SimpleStats;
import com.prudsys.pdm.Transform.MiningTransformationFactory;
import com.prudsys.pdm.Transform.MiningTransformationStep;
import com.prudsys.pdm.Transform.OneToOne.Identity;
import com.prudsys.pdm.Transform.OneToOne.TreatOutlierAttributeValue;
/**
* Realization of outlier treatment for a given mining input
* stream. By default, 'asIs' used for numeric attributes
* and missing values for categorical attributes.<p>
*
* For attributes with treatment of the type 'asIs' no transformation
* is carried out.
*/
public class TreatOutlierValueStream extends VectorTransformationStream
{
// -----------------------------------------------------------------------
// Variables declarations
// -----------------------------------------------------------------------
/** Array of low values for treatment of extreme values. */
private double[] lowValues = null;
/** Array of high values for treatment of extreme values. */
private double[] highValues = null;
/** Treatment type of numeric attributes. */
private String numOutliers = TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asIs;
/** Treatment type of categorical attributes. */
private String catOutliers = TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asMissingValues;
// -----------------------------------------------------------------------
// Constructors
// -----------------------------------------------------------------------
/**
* Empty constructor.
*/
public TreatOutlierValueStream()
{
}
/**
* Constructor for given stream.
*
* @param inputStream mining input stream for treatment parameters
*/
public TreatOutlierValueStream(MiningInputStream inputStream) {
this.inputStream = inputStream;
}
// -----------------------------------------------------------------------
// Getter and setter methods
// -----------------------------------------------------------------------
/**
* Returns treatment of numeric attributes.
*
* @return treatment of numeric attributes
*/
public String getNumOutliers()
{
return numOutliers;
}
/**
* Sets treatment of numeric attributes.
*
* @param numOutliers treatment of numeric attributes
*/
public void setNumOutliers(String numOutliers)
{
this.numOutliers = numOutliers;
}
/**
* Returns treatment of categorical attributes.
*
* @return treatment of categorical attributes
*/
public String getCatOutliers()
{
return catOutliers;
}
/**
* Sets treatment of categorical attributes.
*
* @param catOutliers treatment of categorical attributes
*/
public void setCatOutliers(String catOutliers)
{
this.catOutliers = catOutliers;
}
/**
* Returns array of high values.
*
* @return array of high values
*/
public double[] getHighValues()
{
return highValues;
}
/**
* Returns array of low values.
*
* @return array of low values
*/
public double[] getLowValues()
{
return lowValues;
}
// -----------------------------------------------------------------------
// Transformation methods
// -----------------------------------------------------------------------
/**
* Calculates extreme values using the min and max for numeric
* and the mode (?!) for categorical attributes.
*
* @exception MiningException error while calculating extreme values
*/
private void calcTreatExtremeValues() throws MiningException {
// Calculate simple statistics:
SimpleStats sist = new SimpleStats();
sist.setInputStream(inputStream);
sist.runCalculation();
// Fill array of treatment values:
MiningDataSpecification metaData = inputStream.getMetaData();
int nAtt = metaData.getAttributesNumber();
lowValues = new double[nAtt];
highValues = new double[nAtt];
for (int i = 0; i < nAtt; i++) {
MiningAttribute att = metaData.getMiningAttribute(i);
if (att instanceof NumericAttribute) {
lowValues[i] = sist.getCalculatedValue(att, SimpleStats.STAT_MIN);
highValues[i] = sist.getCalculatedValue(att, SimpleStats.STAT_MAX);
}
else {
lowValues[i] = sist.getCalculatedValue(att, SimpleStats.STAT_MODE);
highValues[i] = lowValues[i];
};
};
}
/**
* Creates mining transformation step for treatment of outliers.
*
* @return mining transformation step
* @exception MiningException no input stream defined
*/
public MiningTransformationStep createMiningTransformationStep() throws MiningException {
// No mining input stream defined => exception:
if (inputStream == null)
throw new MiningException("No mining input stream defined");
// Get extreme values of all attributes:
if (numOutliers.equals( TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asExtremeValues) ||
catOutliers.equals( TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asExtremeValues) )
calcTreatExtremeValues();
// Mining transformation factory:
MiningTransformationFactory mtf = new MiningTransformationFactory();
boolean notrans = true;
MiningDataSpecification metaData = inputStream.getMetaData();
for (int i = 0; i < metaData.getAttributesNumber(); i++) {
// Get attribute and name:
MiningAttribute mAtt = metaData.getMiningAttribute(i);
String attName = mAtt.getName();
// Don't use excluded attributes, if defined:
if ( excludedAttributeNames != null && excludedAttributeNames.indexOf(attName) > -1)
continue;
// Add outlier treatment transformation:
TreatOutlierAttributeValue tro = new TreatOutlierAttributeValue();
tro.setSourceName( attName );
tro.setAssessmentAttribute( mAtt );
if (mAtt instanceof CategoricalAttribute) {
// Don't create transformation for 'asIs':
if ( catOutliers.equals(
TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asIs) )
continue;
tro.setOutliers(catOutliers);
if (catOutliers.equals(
TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asExtremeValues)) {
tro.setLowValue(lowValues[i]);
tro.setHighValue(highValues[i]);
};
}
else {
// Don't create transformation for 'asIs':
if ( numOutliers.equals(
TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asIs) )
continue;
tro.setOutliers(numOutliers);
if (numOutliers.equals(
TreatOutlierAttributeValue.OUTLIER_TREATMENT_METHOD_asExtremeValues)) {
tro.setLowValue(lowValues[i]);
tro.setHighValue(highValues[i]);
};
}
mtf.addOneToOneMapping(tro);
notrans = false;
};
// No transformations at all => just 1 required, use first attribute:
if (notrans) {
MiningAttribute mAtt = metaData.getMiningAttribute(0);
Identity id = new Identity();
id.setSourceName( mAtt.getName() );
mtf.addOneToOneMapping(id);
};
// Create transformation step for treatment:
mts = mtf.createMiningTransformationStep();
return mts;
}
// -----------------------------------------------------------------------
// Other methods
// -----------------------------------------------------------------------
/**
* Returns treatment description.
*
* @returns description of outlier treatment
*/
public String toString() {
String mess = "Treatment outliers value stream. Treatment: " + "\n";
mess = mess + "->Categorical attributes: " + getCatOutliers() + "\n";
mess = mess + "->Numeric attributes: " + getNumOutliers() + "\n";;
if (lowValues != null && highValues != null) {
mess = mess + "-->low and high values:" + "\n";
for (int i = 0; i < lowValues.length; i++)
mess = mess + String.valueOf( lowValues[i] ) + " " +
String.valueOf( highValues[i] ) + "\n";
};
return mess;
}
// -----------------------------------------------------------------------
// Old methods. Should no longer be used
// -----------------------------------------------------------------------
/**
* Creates mining transformation step for treatment of outliers. Deprecated,
* use createMiningTransformationStep instead.
*
* @return mining transformation step
* @exception MiningException no input stream defined
*/
public MiningTransformationStep createTreatOutlierValueTransformationStep() throws MiningException
{
return createMiningTransformationStep();
}
/**
* Returns mining transformation step. Deprecated, use
* getMiningTransformationStep instead.
*
* @return mining transformation step
*/
public MiningTransformationStep getMts()
{
return getMiningTransformationStep();
}
/**
* Creates mining input stream with outliers treatment.
* Uses mining filter stream. Deprecated, use
* createMiningTransformationStep instead.
*
* @return mining filter stream with outlier treatment
* @exception MiningException cannot create transformed stream
*/
public MiningInputStream createTreatOutlierValueStream() throws MiningException {
return createTransformedStream();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -