📄 discretize.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
#include <math.h>
#include "vars.hpp"
#include "domain.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "getarg.hpp"
#include "classify.hpp"
#include "random.hpp"
#include "distvars.hpp"
#include "basstat.hpp"
#include "contingency.hpp"
#include "transval.hpp"
#include "classfromvar.hpp"
#include "discretize.ppp"
TEquiDistDiscretizer::TEquiDistDiscretizer(const int noi, const float fv, const float st)
: numberOfIntervals(noi),
firstCut(fv),
step(st)
{}
// Transforms the value; results is 1+floor((val.floatV-firstCut)/step); 0 if below firstCut, numberOfIntervals if above range
void TEquiDistDiscretizer::transform(TValue &val)
{ if (val.varType!=TValue::FLOATVAR)
raiseError("discrete value expected");
if (!val.isSpecial()) {
if (step<0)
raiseError("'step' not set");
if (numberOfIntervals<1)
raiseError("invalid number of intervals (%i)", numberOfIntervals);
if ((step==0) || (numberOfIntervals==1))
val.intV = 0;
else {
val.intV = (val.floatV<firstCut) ? 0 : 1+int(floor((val.floatV-firstCut)/step));
if (val.intV>=numberOfIntervals)
val.intV = numberOfIntervals-1;
}
}
val.varType = TValue::INTVAR;
}
inline int numDecs(const float &diff, float &factor)
{ if (diff>= 1.0) {
factor = 100.0;
return 2;
}
else {
int decs = (int)ceil(-log10(diff));
if (decs<2)
decs = 2;
factor = exp(decs*log(10.0));
return decs;
}
}
inline float roundFromDecs(const int &decs)
{
return decs <= 0 ? 100.0 : exp(decs*log(10.0));
}
inline void roundToFactor(float &f, const float &factor)
{ f = floor(f*factor+0.5)/factor; }
string mcvt(double f, int decs)
{
char buf[64];
sprintf(buf, "%.*f", decs, f);
return buf;
}
/* Constructs a new TEnumVariable. Its values represent the intervals for values of passed variable var;
getValueFrom points to a classifier which gets a value of the original variable (var) and transforms it using
'this' transformer. */
PVariable TEquiDistDiscretizer::constructVar(PVariable var)
{
TFloatVariable *fvar = var.AS(TFloatVariable);
if (!fvar)
raiseError("invalid attribute type (continuous attribute expected)");
TEnumVariable *evar=mlnew TEnumVariable("D_"+var->name);
PVariable revar(evar);
evar->ordered = true;
if (numberOfIntervals<2)
evar->addValue("C");
else {
float roundfactor;
int decs = numDecs(step, roundfactor);
if ((fvar->adjustDecimals != 2) && (decs < fvar->numberOfDecimals)) {
decs = fvar->numberOfDecimals;
roundfactor = roundFromDecs(fvar->numberOfDecimals);
}
roundToFactor(firstCut, roundfactor);
roundToFactor(step, roundfactor);
float f = firstCut;
string pval;
pval = mcvt(f, decs);
evar->addValue(string("<") + pval);
int steps = numberOfIntervals-2;
while (steps--) {
string s("[");
s += pval;
f += step;
s += ", ";
pval = mcvt(f, decs);
s += pval;
s += ")";
evar->addValue(s);
}
evar->addValue(string(">") + pval);
}
TClassifierFromVar *tcfv = mlnew TClassifierFromVar(revar, var);
tcfv->transformer = this; // rewrapping
revar->getValueFrom = tcfv;
return revar;
}
void TEquiDistDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
cutoffs.clear();
for(int i = 0; i < numberOfIntervals-1; i++)
cutoffs.push_back(firstCut+step*i);
}
TThresholdDiscretizer::TThresholdDiscretizer(const float &athreshold)
: threshold(athreshold)
{}
void TThresholdDiscretizer::transform(TValue &val)
{ if (!val.isSpecial())
val.intV = (val.floatV<=threshold) ? 0 : 1;
val.varType = TValue::INTVAR;
}
PVariable TThresholdDiscretizer::constructVar(PVariable var)
{
TEnumVariable *evar = mlnew TEnumVariable("D_"+var->name);
PVariable revar(evar);
evar->ordered = true;
char s[10];
sprintf(s, "<= %5.3f", threshold);
evar->values->push_back(s);
sprintf(s, "> %5.3f", threshold);
evar->values->push_back(s);
TClassifierFromVar *tcfv = mlnew TClassifierFromVar(revar, var);
tcfv->transformer = this; // rewrapping
revar->getValueFrom = tcfv;
return revar;
}
void TThresholdDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
cutoffs.clear();
cutoffs.push_back(threshold);
}
TBiModalDiscretizer::TBiModalDiscretizer(const float &al, const float &ah)
: low(al),
high(ah)
{}
void TBiModalDiscretizer::transform(TValue &val)
{
if (val.varType != TValue::FLOATVAR)
raiseError("continuous value expected");
if (!val.isSpecial())
val.intV = ((val.intV > low) && (val.intV > high)) ? 1 : 0;
val.varType = TValue::INTVAR;
}
PVariable TBiModalDiscretizer::constructVar(PVariable var)
{
TFloatVariable *fvar = var.AS(TFloatVariable);
if (!fvar)
raiseError("invalid attribute type (continuous attribute expected)");
TEnumVariable *evar = mlnew TEnumVariable("D_"+var->name);
PVariable revar(evar);
evar->ordered = true;
if (high<=low)
raiseError("invalid interval: (%5.3f, %5.3f]", low, high);
float roundfactor;
int decs = numDecs(high-low, roundfactor);
if ((fvar->adjustDecimals != 2) && (decs < fvar->numberOfDecimals)) {
decs = fvar->numberOfDecimals;
roundfactor = roundFromDecs(fvar->numberOfDecimals);
}
roundToFactor(low, roundfactor);
roundToFactor(high, roundfactor);
string lstr = mcvt(low, decs);
string hstr = mcvt(high, decs);
evar->values->push_back("<=" + lstr + " or >" + hstr);
evar->values->push_back("between "+lstr+" and "+hstr);
TClassifierFromVar *tcfv = mlnew TClassifierFromVar(revar, var);
tcfv->transformer = this; // rewrapping
revar->getValueFrom = tcfv;
return revar;
}
void TBiModalDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
cutoffs.clear();
cutoffs.push_back(low);
cutoffs.push_back(high);
}
TIntervalDiscretizer::TIntervalDiscretizer()
: points(mlnew TFloatList())
{}
TIntervalDiscretizer::TIntervalDiscretizer(PFloatList apoints)
: points(apoints)
{};
void TIntervalDiscretizer::transform(TValue &val)
{ checkProperty(points);
if (val.varType!=TValue::FLOATVAR)
raiseError("continuous value expected");
if (!val.isSpecial()) {
val.intV = 0;
for(TFloatList::iterator ri(points->begin()), re(points->end()); (ri!=re) && (*ri<val.floatV); ri++, val.intV++);
}
val.varType = TValue::INTVAR;
}
/* Constructs a new TEnumVariable. Its values represent the intervals for
values of passed variable var; getValueFrom points to a classifier which
gets a value of the original variable (var) and transforms it using
'this' transformer. */
PVariable TIntervalDiscretizer::constructVar(PVariable var)
{
TFloatVariable *fvar = var.AS(TFloatVariable);
if (!fvar)
raiseError("invalid attribute type (continuous attribute expected)");
TEnumVariable *evar=mlnew TEnumVariable("D_"+var->name);
PVariable revar(evar);
TEnumVariable *cl_evar=mlnew TEnumVariable("D_"+var->name);
PVariable cl_revar(cl_evar);
evar->ordered = true;
if (!points->size())
evar->addValue("C");
else {
TFloatList::iterator vb(points->begin()), ve(points->end()), vi;
float mindiff = 1.0;
for(vi=vb+1; vi!=ve; vi++) {
float ndiff = *vi - *(vi-1);
if (ndiff<mindiff)
mindiff = ndiff;
}
float roundfactor;
int decs = numDecs(mindiff, roundfactor);
if ((fvar->adjustDecimals != 2) && (decs < fvar->numberOfDecimals)) {
decs = fvar->numberOfDecimals;
roundfactor = roundFromDecs(fvar->numberOfDecimals);
}
vi=points->begin();
string ostr;
roundToFactor(*vi, roundfactor);
ostr = mcvt(*vi, decs);
evar->addValue(string("<=") + ostr);
while(++vi!=ve) {
string s = "(";
s += ostr;
s += ", ";
roundToFactor(*vi, roundfactor);
ostr = mcvt(*vi, decs);
s += ostr;
s += "]";
evar->addValue(s);
}
evar->addValue(string(">")+ostr);
}
TClassifierFromVar *tcfv = mlnew TClassifierFromVar(cl_revar, var);
tcfv->transformer = this; // rewrapping
revar->getValueFrom = tcfv;
return revar;
}
void TIntervalDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
cutoffs = points.getReference();
}
// Sets the number of intervals (default is 4)
TEquiDistDiscretization::TEquiDistDiscretization(const int anumber)
: TDiscretization(),
numberOfIntervals(anumber)
{}
// Sets the firstCut and step according to the min and max fields of valStat.
PVariable TEquiDistDiscretization::operator()(PBasicAttrStat valStat, PVariable var) const
{ float step = (valStat->max-valStat->min)/numberOfIntervals;
PEquiDistDiscretizer discretizer = mlnew TEquiDistDiscretizer(numberOfIntervals, valStat->min+step, step);
return discretizer->constructVar(var);
}
// Sets the firstCut and step according to the range of values that occur in gen for variable var.
PVariable TEquiDistDiscretization::operator()(PExampleGenerator gen, PVariable var, const long &)
{ if (var->varType!=TValue::FLOATVAR)
raiseError("attribute '%s' is not continuous", var->name.c_str());
if (numberOfIntervals<=0)
raiseError("invalid number of intervals (%i)", numberOfIntervals);
int varPos=gen->domain->getVarNum(var);
TExampleIterator first(gen->begin());
while( first && (*first)[varPos].isSpecial() )
++first;
if (!first)
raiseError("attribute '%s' has no known values", var->name.c_str());
float max, min;
max = min = (*first)[varPos].floatV;
while (++first)
if (!(*first)[varPos].isSpecial()) {
float val = (*first)[varPos].floatV;
if (val>max)
max = val;
if (val<min)
min = val;
};
float step = (max-min)/numberOfIntervals;
PEquiDistDiscretizer discretizer = mlnew TEquiDistDiscretizer(numberOfIntervals, min+step, step);
return discretizer->constructVar(var);
}
TFixedDiscretization::TFixedDiscretization(TFloatList &pts)
: points(mlnew TFloatList(pts))
{}
TFixedDiscretization::TFixedDiscretization(const string &boundaries)
: points()
{ vector<string> atoms;
string2atoms(boundaries, atoms);
points = mlnew TFloatList(atoms.size());
TFloatList::iterator pi(points->begin());
ITERATE(vector<string>, ai, atoms) {
sscanf((*ai).c_str(), "%f", &*pi);
if ((pi!=points->begin()) && (*pi<=pi[-1]))
raiseError("mismatch in cut-off points");
pi++;
}
}
PVariable TFixedDiscretization::operator ()(PExampleGenerator, PVariable var, const long &)
{ PIntervalDiscretizer discretizer = mlnew TIntervalDiscretizer (mlnew TFloatList(points));
return discretizer->constructVar(var);
}
TEquiNDiscretization::TEquiNDiscretization(int anumber)
: numberOfIntervals(anumber),
recursiveDivision(true)
{}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -