⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 distvars.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:
  return distribution[v]; 
}


const float &TDiscDistribution::atint(const int &v) const
{ if (!size())
    raiseError("empty distribution");
  if ((v < 0) || (v >= int(size()))) 
    raiseError("value %i out of range 0-%i", v, size()-1);
  return at(v); 
}


void TDiscDistribution::addint(const int &v, const float &w)
{ if ((v<0) || (v>1e6))
    raiseError("invalid value");

  int ms = v+1 - size();
  if (ms>0) {
    reserve(v+1);
    while (ms--)
      push_back(0.0);
  }

  float &val = distribution[v];
  val += w;
  abs += w;
  cases += w;
  normalized = false;
}


void TDiscDistribution::setint(const int &v, const float &w)
{ if ((v<0) || (v>1e6))
    raiseError("invalid value");

  int ms = v+1 - size();
  if (ms>0) {
    reserve(v+1);
    while (ms--)
      push_back(0.0);
  }

  float &val=distribution[v];
  abs += w-val;
  cases += w-val;
  val = w;
  normalized = false;
}


TDistribution &TDiscDistribution::adddist(const TDistribution &other, const float &factor)
{
  const TDiscDistribution *mother=dynamic_cast<const TDiscDistribution *>(&other);
  if (!mother)
    raiseError("wrong type of distribution for +=");

  int ms = mother->size() - size();
  if (ms>0) {
    reserve(mother->size());
    while (ms--)
      push_back(0.0);
  }
  
  iterator ti = begin();
  const_iterator oi = mother->begin(), oe = mother->end();
  while(oi!=oe)
    *(ti++) += *(oi++) * factor;
  abs += mother->abs * factor;
  cases += mother->cases;
  unknowns += mother->unknowns;
  normalized = false;
  return *this;
}


TDistribution &TDiscDistribution::operator -=(const TDistribution &other)
{
  const TDiscDistribution *mother=dynamic_cast<const TDiscDistribution *>(&other);
  if (!mother)
    raiseError("wrong type of distribution for -=");

  int ms = mother->size() - size();
  if (ms>0) {
    reserve(mother->size());
    while (ms--)
      push_back(0.0);
  }
  
  iterator ti = begin();
  const_iterator oi = mother->begin(), oe = mother->end();
  while(oi!=oe)
    *(ti++) -= *(oi++);
  abs -= mother->abs;
  cases -= mother->cases;
  unknowns -= mother->unknowns;
  normalized = false;
  return *this;
}


TDistribution &TDiscDistribution::adddist(PDistribution other, const float &factor)
{ return adddist(other.getReference(), 1.0); }


TDistribution &TDiscDistribution::operator +=(const TDistribution &other)
{ return adddist(other, 1.0); }


TDistribution &TDiscDistribution::operator +=(PDistribution other)
{ return adddist(other.getReference(), 1.0); }


TDistribution &TDiscDistribution::operator -=(PDistribution other)
{ return operator -= (other.getReference()); }



TDistribution &TDiscDistribution::operator *=(const float &weight)
{ for(iterator di(begin()); di!=end(); (*(di++)) *= weight);
  abs *= weight;
  normalized = false;
  return *this;
}


TDistribution &TDiscDistribution::operator *=(const TDistribution &other)
{ 
  const TDiscDistribution *mother=dynamic_cast<const TDiscDistribution *>(&other);
  if (!mother)
    raiseError("wrong type of distribution for *=");

  abs = 0.0;
  iterator di = begin(), de = end();
  const_iterator di2 = mother->begin(), de2 = mother->end();
  while ((di!=de) && (di2!=de2))
    abs += (*(di++) *= *(di2++));

  if (di!=de)
    erase(di, de);

  normalized = false;
  return *this;
}


TDistribution &TDiscDistribution::operator *= (PDistribution other)
{ return operator *= (other.getReference()); }


TDistribution &TDiscDistribution::operator /=(const TDistribution &other)
{ const TDiscDistribution *mother=dynamic_cast<const TDiscDistribution *>(&other);
  if (!mother)
    raiseError("wrong type of distribution for /=");

  abs = 0.0;
  iterator di = begin(), de = end();
  const_iterator di2 = mother->begin(), de2 = mother->end();
  for (; (di!=de) && (di2!=de2); di++, di2++) {
    if ((-1e-20 < *di2) && (*di2 < 1e-20)) {
      if ((*di<-1e-20) || (*di>1e-20))
        raiseError("division by zero in /=");
    }
    else
      abs += (*di /= *di2);
  }

  if (di!=de)
    erase(di, de);

  normalized = false;
  return *this;
}


TDistribution &TDiscDistribution::operator /= (PDistribution other)
{ return operator /= (other.getReference()); }


TDistribution &TDiscDistribution::mul(const TDistribution &other, const float &weight)
{ const TDiscDistribution *mother=dynamic_cast<const TDiscDistribution *>(&other);
  if (!mother)
    raiseError("wrong type of distribution for -=");

  abs = 0.0;
  iterator di = begin(), de = end();
  const_iterator di2 = mother->begin(), de2 = mother->end();
  while ((di!=de) && (di2!=de2))
    abs += (*(di++) *= weight * *(di2++));

  if (di!=de)
    erase(di, de);

  normalized = false;
  return *this;
}


TDistribution &TDiscDistribution::mul(PDistribution other, const float &weight)
{ return mul(other.getReference(), weight); }


/*  Returns normalized scalar products of distributions of 'other' and 'this'.
    The result corresponds to a probability that two random values chosen according
    to the given distributions are same. */
float TDiscDistribution::compatibility(const TSomeValue &ot) const
{ const TDiscDistribution *dv=dynamic_cast<const TDiscDistribution *>(&ot);
  if (dv) {
    float sum=0;
    for(const_iterator i1=begin(), i2=dv->begin();
        (i1!=end());
        sum += *(i1++) * *(i2++))
    return sum/abs/dv->abs;
  }

  const TValue *vv=dynamic_cast<const TValue *>(&ot);
  if (   (vv) 
      || (vv->varType==TValue::INTVAR))
    return (vv->intV>int(size())) ? 0.0 : operator[](vv->intV)/abs;
      
  raiseError("can't compare values of different types");
  return 0.0; // to make compilers happy
}


/*  Declared only since it is abstract in TSomeValue.
    Definition is somewhat artificial: compare does a lexicographical comparison of probabilities. */
int  TDiscDistribution::compare(const TSomeValue &ot) const
{ const TDiscDistribution *dv=dynamic_cast<const TDiscDistribution *>(&ot);
  if (!dv)
    raiseError("can't compare values of different types");

  const_iterator i1=begin(), i2=dv->begin();
  for( ; (i1!=end()) && (*i1==*i2); i1++, i2++);
  if (i1==end())
    return 0;
  else 
    if (*i1<*i2)
      return -1;
  return 1;
}


/*  Declared only since it is abstract in TSomeValue.
    Definitions is somewhat artificial: compatible returns true if compatibility>0 (i.e. if there
    is a non-xero probability that a random values with given distributions are same). */
bool  TDiscDistribution::compatible (const TSomeValue &ot) const
{ return (compatibility(ot)>0); }


void TDiscDistribution::normalize()
{ if (!normalized) {
    if (abs) {
      this_ITERATE(dvi)
        *dvi /= abs;
      abs=1.0;
    }
    else 
      if (size()) {
        float p = 1.0/float(size());
        this_ITERATE(dvi)
          *dvi = p;
        abs = 1.0;
      }
   normalized = true;
  }
}


int TDiscDistribution::highestProbIntIndex() const
{
  if (!size())
    return 0;

  int wins = 1;
  int best = 0;
  float bestP = operator[](0);
  int i, e;

  unsigned long crc;
  INIT_CRC(crc);

  for(i = 1, e = int(size()); --e; i++) {
    const float &P = operator[](i);
    add_CRC(P, crc);

    if (P > bestP) {
      best = i;
      bestP = P;
      wins = 1;
    }
    else if (P==bestP)
      wins++;
  }

  if (wins==1)
    return best;

  FINISH_CRC(crc);
  crc &= 0x7fffffff;

  for(i = 0, wins = 1 + crc % wins; wins; i++)
    if (operator[](i)==bestP)
      wins--;

  return i-1;
}


int TDiscDistribution::highestProbIntIndex(const long &random) const
{
  if (!size())
    return 0;

  int wins = 1;
  int best = 0;
  float bestP = operator[](0);
  int i, e;

  for(i = 1, e = int(size()); --e; i++)
    if (operator[](i) > bestP) {
      best = i;
      bestP = operator[](i);
      wins = 1;
    }
    else if (operator[](i)==bestP)
      wins++;

  if (wins==1)
    return best;

  for(i = 0, wins = 1 + random % wins; wins; i++)
    if (operator[](i)==bestP)
      wins--;

  return i-1;
}


int TDiscDistribution::highestProbIntIndex(const TExample &exam) const
{
  if (!size())
    return 0;

  int wins = 1;
  int best = 0;
  float bestP = operator[](0);
  int i, e;

  for(i = 1, e = int(size()); --e; i++)
    if (operator[](i) > bestP) {
      best = i;
      bestP = operator[](i);
      wins = 1;
    }
    else if (operator[](i)==bestP)
      wins++;

  if (wins==1)
    return best;

  wins = 1 + exam.sumValues() % wins;

  i = 0;    
  while (wins)
    if (operator[](i++)==bestP)
      wins--;

  return i-1;
}


float TDiscDistribution::highestProb() const
{
  float best=-1;
  for(int i=0, isize = size(); i<isize; i++)
    if (operator[](i) > best)
      best=i;
  if (best>=0)
    return operator[](best);
  else
    return size() ? 1.0/size() : 0.0;
}


bool TDiscDistribution::noDeviation() const
{ const_this_ITERATE(dvi)
    if (*dvi)
      return *dvi == abs;
  return size()==1;
}
  

int TDiscDistribution::randomInt(const long &random)
{ 
  float ri = (random & 0x7fffffff) / float(0x7fffffff);
  const_iterator di(begin());
  while (ri > *di)
    ri -= *(di++);
  return int(di-begin());
}


int TDiscDistribution::randomInt()
{ 
  if (!randomGenerator)
    randomGenerator = mlnew TRandomGenerator;

  float ri = randomGenerator->randfloat(abs);
  const_iterator di(begin());
  while (ri > *di)
    ri -= *(di++);
  return int(di-begin());
}


float TDiscDistribution::p(const int &x) const
{ if (!abs) 
    return size() ? 1.0/size() : 0.0;
  if (x>=size())
    return 0.0;
  return atint(x)/abs; 
}

int TDiscDistribution::noOfElements() const
{ return size(); }


int TDiscDistribution::sumValues() const
{ unsigned long crc;
  INIT_CRC(crc);

  const_this_ITERATE(dvi)
      add_CRC(*dvi, crc);

  FINISH_CRC(crc);
  return int(crc & 0x7fffffff);
}


TContDistribution::TContDistribution()
: sum(0.0),
  sum2(0.0)
{ supportsContinuous = true; }


TContDistribution::TContDistribution(const map<float, float> &dist)
: distribution(dist), 
  sum(0.0),
  sum2(0.0)
{ abs = 0.0;
  this_ITERATE(di) {
    abs+=(*di).second;
    sum+=(*di).second*(*di).first;
    sum2+=(*di).second*(*di).first*(*di).first;
  }
  cases = abs;
  supportsContinuous = true; 
}


TContDistribution::TContDistribution(PVariable var) 
: TDistribution(var),
  sum(0.0),
  sum2(0.0)
{ if (var->varType!=TValue::FLOATVAR)
     raiseError("attribute '%s' is not continuous", var->name.c_str());
  supportsContinuous = true; 
}


TContDistribution::TContDistribution(PExampleGenerator gen, const int &position, const int &weightID)
: sum(0.0),
  sum2(0.0)
{
  supportsContinuous = true;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -