⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 distvars.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:

  if (position >= gen->domain->variables->size())
    raiseError("index %i out of range", position);

  variable = gen->domain->variables->at(position);
  if (variable->varType != TValue::FLOATVAR)
    raiseError("attribute '%s' is not continuous", variable->name.c_str());

  PEITERATE(ei, gen)
    add((*ei)[position], WEIGHT(*ei));
}


TContDistribution::TContDistribution(PExampleGenerator gen, PVariable var, const int &weightID)
: TDistribution(var),
  sum(0.0),
  sum2(0.0)
{
  supportsContinuous = true;

  if (variable->varType != TValue::FLOATVAR)
    raiseError("attribute '%s' is not continuous", variable->name.c_str());

  int position = gen->domain->getVarNum(variable, false);
  if (position != ILLEGAL_INT)
    PEITERATE(ei, gen)
      add((*ei)[position], WEIGHT(*ei));
  else
    if (variable->getValueFrom)
      PEITERATE(ei, gen)
        add(variable->computeValue(*ei), WEIGHT(*ei));
    else
      raiseError("attribute '%s' not in domain and cannot be computed", variable->name.c_str());
}


const float &TContDistribution::atfloat(const float &v)
{ if (find(v)!=end())
    distribution[v]=0;
  return distribution[v]; 
}


const float &TContDistribution::atfloat(const float &v) const
{ const_iterator vi=find(v);
  if (vi==end())
    raiseError("value %5.3f does not exist", v);
  return (*vi).second;
}


void TContDistribution::addfloat(const float &v, const float &w)
{ 
  iterator vi=find(v);
  if (vi==end())
    distribution[v]=w;
  else
    (*vi).second+=w;

  abs += w;
  cases += w;
  sum += w * v;
  sum2 += w * v*v;
  normalized = false;
}


void TContDistribution::setfloat(const float &v, const float &w)
{ 
  iterator vi=find(v);
  if (vi==end()) {
    distribution[v]=w;
    abs += w;
    cases += w;
    sum += w * v;
    sum += w * v*v;
  }
  else {
    float dif = w - (*vi).second;
    abs += dif;
    cases += w;
    sum += dif * v;
    sum2 += dif * v*v;
    (*vi).second += w;
  }
 
  normalized = false;
}


TDistribution &TContDistribution::operator +=(const TDistribution &other)
{
  const TContDistribution *mother = dynamic_cast<const TContDistribution *>(&other);
  if (!mother)
    raiseError("wrong distribution type for +=");

  const_PITERATE(TContDistribution, oi, mother) 
    addfloat((*oi).first, (*oi).second);

  unknowns += mother->unknowns;

  return *this;
}


TDistribution &TContDistribution::operator -=(const TDistribution &other)
{
  const TContDistribution *mother = dynamic_cast<const TContDistribution *>(&other);
  if (!mother)
    raiseError("wrong distribution type for -=");

  const_PITERATE(TContDistribution, oi, mother) 
    addfloat((*oi).first, -(*oi).second);

  unknowns -= mother->unknowns;

  return *this;
}


TDistribution &TContDistribution::operator +=(PDistribution other)
{ return operator += (other.getReference()); }


TDistribution &TContDistribution::operator -=(PDistribution other)
{ return operator -= (other.getReference()); }



TDistribution &TContDistribution::operator *=(const float &weight)
{ for(iterator i(begin()), e(end()); i!=e; (*(i++)).second*=weight);
  abs *= weight;
  sum *= weight;
  sum2 *= weight;
  normalized = false;
  return *this;
}


float TContDistribution::highestProbFloatIndex() const
{
  // Could use sumValues here, but it's too expensive; this should work for distributions that are distributed enough
  long sum = 0;
  { const_this_ITERATE(i)
      sum += *(long *)(&(*i).first) + *(long *)(&(*i).second);
  }

  TSimpleRandomGenerator rg(sum);

  int wins=0;
  const_iterator best;
  const_this_ITERATE(i)
    if (   (wins==0) && ((wins=1)==1)
        || ((*i).second >  (*best).second) && ((wins=1)==1)
        || ((*i).second == (*best).second) && rg.randbool(++wins))
      best = i;

  if (!wins)
    raiseError("cannot compute the modus of an empty distribution");

  return (*best).first;
}


float TContDistribution::highestProb() const
{
  long sum = 0;
  { const_this_ITERATE(i)
      sum += *(long *)(&(*i).first) + *(long *)(&(*i).second);
   }

  TSimpleRandomGenerator rg(sum);

  int wins=0;
  const_iterator best;
  const_this_ITERATE(i)
    if (   (wins==0) && ((wins=1)==1)
        || ((*i).second >  (*best).second) && ((wins=1)==1)
        || ((*i).second == (*best).second) && rg.randbool(++wins))
      best = i;

  if (wins)
    return (*best).second;
  else
    return size() ? 1.0/size() : 0.0;
}


bool TContDistribution::noDeviation() const
{ return size()==1;
}


float TContDistribution::average() const
{ if (!abs)
    if (variable)
      raiseError("cannot compute average ('%s' has no defined values)", variable->name.c_str());
    else
      raiseError("cannot compute average (attribute has no defined values)");

  return sum/abs ; 
}


float TContDistribution::dev() const
{ 
  if (abs<=1e-7)
    if (variable)
      raiseError("cannot compute standard deviation ('%s' has no defined values)", variable->name.c_str());
    else
      raiseError("cannot compute standard deviation (attribute has no defined values)");

  return sqrt((sum2-sum*sum/abs)/abs);
}
  
float TContDistribution::var() const
{
  if (!abs)
    if (variable)
      raiseError("cannot compute variance ('%s' has no defined values)", variable->name.c_str());
    else
      raiseError("cannot compute variance (attribute has no defined values)");

  return (sum2-sum*sum/abs)/abs;
}
  
float TContDistribution::error() const
{ return abs<=1.0 ? 0.0 : sqrt((sum2-sum*sum/abs)/(abs-1) / abs); }


float TContDistribution::percentile(const float &perc) const
{ if ((perc<0) || (perc>100))
    raiseError("invalid percentile");

  if (!size())
    raiseError("empty distribution");

  if (perc==0.0)
    return (*begin()).first;
  
  if (perc==100.0) {
    const_iterator li(end());
    return (*--li).first;
  }

  float togo = abs*perc/100.0;
  const_iterator ths(begin()), prev, ee(end());

  if (ths == ee)
    raiseError("empty distribution");

  while ((ths != ee) && (togo > 0)) {
    togo -= (*ths).second;
    prev = ths;
    ths++;
  }

  if ((togo < 0) || (ths == ee))
    return (*prev).first;

  // togo==0.0 && ths!=ee
  return ((*prev).first + (*ths).first) / 2.0;
}


void TContDistribution::normalize()
{ if (!normalized) {
    if (abs) {
      this_ITERATE(dvi)
        (*dvi).second /= abs;
      sum /= abs;
      sum2 /= abs;
      abs = 1.0;
    }
    else if (size()) {
      float p = 1.0/float(size());
      sum = 0.0;
      sum2 = 0.0;
      this_ITERATE(dvi) {
        (*dvi).second = p;
        sum += (*dvi).first;
        sum2 += sqr((*dvi).first);
      }
      sum /= abs;
      sum2 /= abs;
      abs = 1.0;
    }

    normalized = true;
  }
}


float TContDistribution::randomFloat()
{
  if (!randomGenerator)
    randomGenerator = mlnew TRandomGenerator;

  float ri = randomGenerator->randfloat(abs);
  const_iterator di(begin());
  while (ri > (*di).first)
    ri -= (*(di++)).first;
  return (*di).second;
}


float TContDistribution::randomFloat(const long &random)
{ 
  float ri = (random & 0x7fffffff) / float(0x7fffffff);
  const_iterator di(begin());
  while (ri > (*di).first)
    ri -= (*(di++)).first;
  return (*di).second;
}


float TContDistribution::p(const float &x) const
{ const_iterator rb = upper_bound(x);
  if (rb==end())
    return 0.0;
  if ((*rb).first==x)
    return (*rb).second;
  if (rb==begin())
    return 0.0;
  const_iterator lb = rb;
  lb--;

  return (*lb).second + (x - (*lb).first) * ((*rb).second - (*lb).second) / ((*rb).first - (*lb).first);
}


int TContDistribution::sumValues() const
{ unsigned long crc;
  INIT_CRC(crc);

  const_this_ITERATE(dvi) {
    add_CRC((*dvi).first, crc);
    add_CRC((*dvi).second, crc);
  }

  FINISH_CRC(crc);
  return int(crc & 0x7fffffff);
}


TGaussianDistribution::TGaussianDistribution(const float &amean, const float &asigma, const float &anabs)
: mean(amean),
  sigma(asigma)
{
  abs = anabs;
  normalized = true;
  supportsContinuous = true; 
}


TGaussianDistribution::TGaussianDistribution(PDistribution dist)
: mean(dist->average()),
  sigma(sqrt(dist->dev()))
{
 abs = dist->abs;
 normalized = true; 
 supportsContinuous = true; 
}



float TGaussianDistribution::average() const
{ return mean; }


float TGaussianDistribution::var() const
{ return sigma*sigma; }
  

float TGaussianDistribution::dev() const
{ return sigma; }
  

float TGaussianDistribution::error() const
{ return sigma; }
  

void TGaussianDistribution::normalize()
{ abs = 1.0; }


float TGaussianDistribution::highestProbFloatIndex() const
{ return mean; }


#define pi 3.1415926535897931

float TGaussianDistribution::highestProb() const
{ return abs * 1/(sigma * sqrt(2*pi)); }


float TGaussianDistribution::randomFloat()
{  
  if (!randomGenerator)
    randomGenerator = mlnew TRandomGenerator;

  return (float)gasdev((double)mean, (double)sigma, randomGenerator.getReference());
}


float TGaussianDistribution::randomFloat(const long &random)
{  
  TRandomGenerator rg(random);
  return (float)gasdev((double)mean, (double)sigma, rg);
}


float TGaussianDistribution::p(const float &x) const
{ return abs * exp(-sqr((x-mean)/2/sigma)) / (sigma*sqrt(2*pi)); }


bool TGaussianDistribution::noDeviation() const
{ return sigma==0.0; }


int TGaussianDistribution::sumValues() const
{ unsigned long crc;
  INIT_CRC(crc);
  add_CRC(mean, crc);
  add_CRC(sigma, crc);
  FINISH_CRC(crc);
  return int(crc & 0x7fffffff);
}


TDomainDistributions::TDomainDistributions()
{}


TDomainDistributions::TDomainDistributions(PExampleGenerator gen, const long weightID, bool skipDiscrete, bool skipContinuous)
{
  reserve(gen->domain->variables->size());
  PITERATE(TVarList, vi, gen->domain->variables)
    push_back(   skipDiscrete && ((*vi)->varType == TValue::INTVAR)
              || skipContinuous && ((*vi)->varType == TValue::FLOATVAR) ? PDistribution() : TDistribution::create(*vi));

  for(TExampleIterator fi(gen->begin()); fi; ++fi) {
    TExample::iterator ei=(*fi).begin();
    float weight=WEIGHT(*fi);
    for(iterator di=begin(); di!=end(); di++, ei++)
      if (*di)
        (*di)->add(*ei, weight);
  }
}


void TDomainDistributions::normalize()
{ this_ITERATE(di)
    (*di)->normalize(); 
}


PDistribution getClassDistribution(PExampleGenerator gen, const long &weightID)
{ if (!gen)
    raiseErrorWho("getClassDistribution", "no examples");

  if (!gen->domain || !gen->domain->classVar)
    raiseErrorWho("getClassDistribution", "invalid example generator or class-less domain");

  PDistribution classDist = TDistribution::create(gen->domain->classVar);
  TDistribution *uclassdist = const_cast<TDistribution *>(classDist.getUnwrappedPtr());
  PEITERATE(ei, gen)
    uclassdist->add((*ei).getClass(), WEIGHT(*ei));
  return classDist;
}

#undef NOT_IMPLEMENTED
#undef CHECKVALTYPE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -