⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 discretize.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/


#include <math.h>

#include "vars.hpp"
#include "domain.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "getarg.hpp"

#include "classify.hpp"
#include "random.hpp"
#include "distvars.hpp"
#include "basstat.hpp"
#include "contingency.hpp"
#include "transval.hpp"
#include "classfromvar.hpp"

#include "discretize.ppp"


TEquiDistDiscretizer::TEquiDistDiscretizer(const int noi, const float fv, const float st)
: numberOfIntervals(noi),
  firstCut(fv),
  step(st)
{}


// Transforms the value; results is 1+floor((val.floatV-firstCut)/step); 0 if below firstCut, numberOfIntervals if above range 
void TEquiDistDiscretizer::transform(TValue &val)
{ if (val.varType!=TValue::FLOATVAR)
    raiseError("discrete value expected");
  
  if (!val.isSpecial()) {
    if (step<0)
      raiseError("'step' not set");
    if (numberOfIntervals<1)
      raiseError("invalid number of intervals (%i)", numberOfIntervals);

    if ((step==0) || (numberOfIntervals==1))
      val.intV = 0;

    else {
      val.intV = (val.floatV<firstCut) ? 0 : 1+int(floor((val.floatV-firstCut)/step));
      if (val.intV>=numberOfIntervals)
        val.intV = numberOfIntervals-1;
    }
  }
  
  val.varType = TValue::INTVAR;
}


inline int numDecs(const float &diff, float &factor)
{ if (diff>= 1.0) {
    factor = 100.0;
    return 2;
  }
  else {
    int decs = (int)ceil(-log10(diff));
    if (decs<2)
      decs = 2;
    factor = exp(decs*log(10.0));
    return decs;
  }
}


inline float roundFromDecs(const int &decs)
{ 
  return decs <= 0 ? 100.0 : exp(decs*log(10.0));
}

inline void roundToFactor(float &f, const float &factor)
{ f = floor(f*factor+0.5)/factor; }


string mcvt(double f, int decs)
{ 
  char buf[64];
  sprintf(buf, "%.*f", decs, f);
  return buf;
}

/*  Constructs a new TEnumVariable. Its values represent the intervals for values of passed variable var;
    getValueFrom points to a classifier which gets a value of the original variable (var) and transforms it using
    'this' transformer. */
PVariable TEquiDistDiscretizer::constructVar(PVariable var)
{ 
  TFloatVariable *fvar = var.AS(TFloatVariable);
  if (!fvar)
    raiseError("invalid attribute type (continuous attribute expected)");

  TEnumVariable *evar=mlnew TEnumVariable("D_"+var->name);
  PVariable revar(evar);

  evar->ordered = true;

  if (numberOfIntervals<2)
    evar->addValue("C");

  else {
    float roundfactor;
    int decs = numDecs(step, roundfactor);

    if ((fvar->adjustDecimals != 2) && (decs < fvar->numberOfDecimals)) {
      decs = fvar->numberOfDecimals;
      roundfactor = roundFromDecs(fvar->numberOfDecimals);
    }

    roundToFactor(firstCut, roundfactor);
    roundToFactor(step, roundfactor);

    float f = firstCut;
    string pval;

    pval = mcvt(f, decs);
    evar->addValue(string("<") + pval);

    int steps = numberOfIntervals-2;
    while (steps--) {
      string s("[");
      s += pval;
      f += step;
      s += ", ";
      pval = mcvt(f, decs);
      s += pval;
      s += ")";
      evar->addValue(s);
    }

    evar->addValue(string(">") + pval);
  }
  
  TClassifierFromVar *tcfv = mlnew TClassifierFromVar(revar, var);
  tcfv->transformer = this; // rewrapping
  revar->getValueFrom = tcfv;
  return revar;
}


void TEquiDistDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
  cutoffs.clear();
  for(int i = 0; i < numberOfIntervals-1; i++)
    cutoffs.push_back(firstCut+step*i);
}


TThresholdDiscretizer::TThresholdDiscretizer(const float &athreshold)
: threshold(athreshold)
{}


void TThresholdDiscretizer::transform(TValue &val)
{ if (!val.isSpecial())
    val.intV = (val.floatV<=threshold) ? 0 : 1;
  val.varType = TValue::INTVAR;
}


PVariable TThresholdDiscretizer::constructVar(PVariable var)
{ 
  TEnumVariable *evar = mlnew TEnumVariable("D_"+var->name);
  PVariable revar(evar);

  evar->ordered = true;

  char s[10];
  sprintf(s, "<= %5.3f", threshold);
  evar->values->push_back(s);
  sprintf(s, "> %5.3f", threshold);
  evar->values->push_back(s);

  TClassifierFromVar *tcfv = mlnew TClassifierFromVar(revar, var);
  tcfv->transformer = this; // rewrapping
  revar->getValueFrom = tcfv;
  return revar;
}


void TThresholdDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
  cutoffs.clear();
  cutoffs.push_back(threshold);
}


TBiModalDiscretizer::TBiModalDiscretizer(const float &al, const float &ah)
: low(al),
  high(ah)
{}


void TBiModalDiscretizer::transform(TValue &val)
{ 
  if (val.varType != TValue::FLOATVAR)
    raiseError("continuous value expected");

  if (!val.isSpecial())
    val.intV = ((val.intV > low) && (val.intV > high)) ? 1 : 0;

  val.varType = TValue::INTVAR;
}


PVariable TBiModalDiscretizer::constructVar(PVariable var)
{ 
  TFloatVariable *fvar = var.AS(TFloatVariable);
  if (!fvar)
    raiseError("invalid attribute type (continuous attribute expected)");

  TEnumVariable *evar = mlnew TEnumVariable("D_"+var->name);
  PVariable revar(evar);

  evar->ordered = true;

  if (high<=low)
    raiseError("invalid interval: (%5.3f, %5.3f]", low, high);

  float roundfactor;
  int decs = numDecs(high-low, roundfactor);

  if ((fvar->adjustDecimals != 2) && (decs < fvar->numberOfDecimals)) {
    decs = fvar->numberOfDecimals;
    roundfactor = roundFromDecs(fvar->numberOfDecimals);
  }

  roundToFactor(low, roundfactor);
  roundToFactor(high, roundfactor);
  string lstr = mcvt(low, decs);
  string hstr = mcvt(high, decs);

  evar->values->push_back("<=" + lstr + " or >" + hstr);
  evar->values->push_back("between "+lstr+" and "+hstr);

  TClassifierFromVar *tcfv = mlnew TClassifierFromVar(revar, var);
  tcfv->transformer = this; // rewrapping
  revar->getValueFrom = tcfv;
  return revar;
}


void TBiModalDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
  cutoffs.clear();
  cutoffs.push_back(low);
  cutoffs.push_back(high);
}


TIntervalDiscretizer::TIntervalDiscretizer()
: points(mlnew TFloatList())
{}


TIntervalDiscretizer::TIntervalDiscretizer(PFloatList apoints)
: points(apoints)
{};



void TIntervalDiscretizer::transform(TValue &val)
{ checkProperty(points);
  if (val.varType!=TValue::FLOATVAR)
    raiseError("continuous value expected");

  if (!val.isSpecial()) {
    val.intV = 0;
    for(TFloatList::iterator ri(points->begin()), re(points->end()); (ri!=re) && (*ri<val.floatV); ri++, val.intV++);
  }

  val.varType = TValue::INTVAR;
}


/*  Constructs a new TEnumVariable. Its values represent the intervals for
    values of passed variable var; getValueFrom points to a classifier which
    gets a value of the original variable (var) and transforms it using
    'this' transformer. */
PVariable TIntervalDiscretizer::constructVar(PVariable var)
{
  TFloatVariable *fvar = var.AS(TFloatVariable);
  if (!fvar)
    raiseError("invalid attribute type (continuous attribute expected)");

  TEnumVariable *evar=mlnew TEnumVariable("D_"+var->name);
  PVariable revar(evar);

  TEnumVariable *cl_evar=mlnew TEnumVariable("D_"+var->name);
  PVariable cl_revar(cl_evar);

  evar->ordered = true;

  if (!points->size())
    evar->addValue("C");

  else {
    TFloatList::iterator vb(points->begin()), ve(points->end()), vi;
    float mindiff = 1.0;
    for(vi=vb+1; vi!=ve; vi++) {
      float ndiff = *vi - *(vi-1);
      if (ndiff<mindiff)
        mindiff = ndiff;
    }

    float roundfactor;
    int decs = numDecs(mindiff, roundfactor);

    if ((fvar->adjustDecimals != 2) && (decs < fvar->numberOfDecimals)) {
      decs = fvar->numberOfDecimals;
      roundfactor = roundFromDecs(fvar->numberOfDecimals);
    }

    vi=points->begin();
    string ostr;

    roundToFactor(*vi, roundfactor);    
    ostr = mcvt(*vi, decs);
    evar->addValue(string("<=") + ostr);

    while(++vi!=ve) {
      string s = "(";
      s += ostr;
      s += ", ";
      roundToFactor(*vi, roundfactor);
      ostr = mcvt(*vi, decs);
      s += ostr;
      s += "]";
      evar->addValue(s);
    }

    evar->addValue(string(">")+ostr);
  } 

  TClassifierFromVar *tcfv = mlnew TClassifierFromVar(cl_revar, var);
  tcfv->transformer = this; // rewrapping
  revar->getValueFrom = tcfv; 
  return revar;
}



void TIntervalDiscretizer::getCutoffs(vector<float> &cutoffs) const
{
  cutoffs = points.getReference();
}


// Sets the number of intervals (default is 4)
TEquiDistDiscretization::TEquiDistDiscretization(const int anumber)
: TDiscretization(),
  numberOfIntervals(anumber)
{}


// Sets the firstCut and step according to the min and max fields of valStat.
PVariable TEquiDistDiscretization::operator()(PBasicAttrStat valStat, PVariable var) const
{ float step = (valStat->max-valStat->min)/numberOfIntervals;
  PEquiDistDiscretizer discretizer = mlnew TEquiDistDiscretizer(numberOfIntervals, valStat->min+step, step);
  return discretizer->constructVar(var);
}


// Sets the firstCut and step according to the range of values that occur in gen for variable var.
PVariable TEquiDistDiscretization::operator()(PExampleGenerator gen, PVariable var, const long &)
{ if (var->varType!=TValue::FLOATVAR)
    raiseError("attribute '%s' is not continuous", var->name.c_str());

  if (numberOfIntervals<=0)
    raiseError("invalid number of intervals (%i)", numberOfIntervals);

  int varPos=gen->domain->getVarNum(var);

  TExampleIterator first(gen->begin());
  while( first && (*first)[varPos].isSpecial() )
    ++first;
  if (!first)
    raiseError("attribute '%s' has no known values", var->name.c_str());

  float max, min;
  max = min = (*first)[varPos].floatV;
  while (++first)
    if (!(*first)[varPos].isSpecial()) {
      float val = (*first)[varPos].floatV;
      if (val>max)
        max = val;
      if (val<min)
        min = val;
    };

  float step = (max-min)/numberOfIntervals;
  PEquiDistDiscretizer discretizer = mlnew TEquiDistDiscretizer(numberOfIntervals, min+step, step);
  return discretizer->constructVar(var);
}



TFixedDiscretization::TFixedDiscretization(TFloatList &pts)
: points(mlnew TFloatList(pts))
{}


TFixedDiscretization::TFixedDiscretization(const string &boundaries)
: points()
{ vector<string> atoms;
  string2atoms(boundaries, atoms);
  points = mlnew TFloatList(atoms.size());
  TFloatList::iterator pi(points->begin());
  ITERATE(vector<string>, ai, atoms) {
    sscanf((*ai).c_str(), "%f", &*pi);
    if ((pi!=points->begin()) && (*pi<=pi[-1]))
      raiseError("mismatch in cut-off points");
    pi++;
  }
}


PVariable TFixedDiscretization::operator ()(PExampleGenerator, PVariable var, const long &)
{ PIntervalDiscretizer discretizer = mlnew TIntervalDiscretizer (mlnew TFloatList(points));
  return discretizer->constructVar(var);
}



TEquiNDiscretization::TEquiNDiscretization(int anumber)
: numberOfIntervals(anumber),
  recursiveDivision(true)
{}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -