⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 distance.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
        normalized.push_back((*ei).floatV);
  }
}


/*TExamplesDistanceConstructor_Maximal::TExamplesDistanceConstructor_Maximal()
{}
*/

TExamplesDistanceConstructor_Euclidean::TExamplesDistanceConstructor_Euclidean()
{}


TExamplesDistanceConstructor_Manhattan::TExamplesDistanceConstructor_Manhattan()
{}


PExamplesDistance TExamplesDistanceConstructor_Maximal::operator()(PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{ return mlnew TExamplesDistance_Maximal(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, bstat); }


PExamplesDistance TExamplesDistanceConstructor_Manhattan::operator()(PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{ return mlnew TExamplesDistance_Manhattan(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, bstat); }


PExamplesDistance TExamplesDistanceConstructor_Euclidean::operator()(PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{ return mlnew TExamplesDistance_Euclidean(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, bstat); }



TExamplesDistance_Maximal::TExamplesDistance_Maximal()
{}


TExamplesDistance_Manhattan::TExamplesDistance_Manhattan()
{}


TExamplesDistance_Euclidean::TExamplesDistance_Euclidean()
{}



TExamplesDistance_Maximal::TExamplesDistance_Maximal(const bool &ignoreClass, const bool &normalize, const bool &ignoreUnknowns, PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat dstat)
: TExamplesDistance_Normalized(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, dstat)
{}


TExamplesDistance_Manhattan::TExamplesDistance_Manhattan(const bool &ignoreClass, const bool &normalize, const bool &ignoreUnknowns, PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat dstat)
: TExamplesDistance_Normalized(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, dstat)
{}


TExamplesDistance_Euclidean::TExamplesDistance_Euclidean(const bool &ignoreClass, const bool &normalize, const bool &ignoreUnknowns, PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat dstat)
: TExamplesDistance_Normalized(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, dstat),
  distributions(mlnew TDomainDistributions(egen, weightID, false, true)),
  bothSpecialDist(mlnew TAttributedFloatList())
{
  bothSpecialDist->attributes = averages->attributes;

  PITERATE(TDomainDistributions, di, distributions) {
    if (*di) {
      float sum2 = 0;
      TDiscDistribution *distr = (*di).AS(TDiscDistribution);
      ITERATE(vector<float>, pi, distr->distribution)
        sum2 += (*pi) * (*pi);
      sum2 /= distr->abs * distr->abs;
      bothSpecialDist->push_back(1-sum2);
    }
    else
      bothSpecialDist->push_back(0.0);
  }
}



float TExamplesDistance_Maximal::operator ()(const TExample &e1, const TExample &e2) const 
{ 
  vector<float> difs;
  getDifs(e1, e2, difs);
  return difs.size() ? *max_element(difs.begin(), difs.end()) : 0.0;
}


float TExamplesDistance_Manhattan::operator ()(const TExample &e1, const TExample &e2) const 
{ 
  vector<float> difs;
  getDifs(e1, e2, difs);
  float dist = 0.0;
  const_ITERATE(vector<float>, di, difs)
    dist += *di;
  return dist;
}


float TExamplesDistance_Euclidean::operator ()(const TExample &e1, const TExample &e2) const 
{ 
  vector<float> difs;
  getDifs(e1, e2, difs);
  float dist = 0.0;
  TExample::const_iterator e1i(e1.begin()), e2i(e2.begin());
  TFloatList::const_iterator avgi(averages->begin()), vari(variances->begin());
  vector<float>::const_iterator di(difs.begin()), de(difs.end());
  TDomainDistributions::const_iterator disti(distributions->begin());
  TFloatList::const_iterator bsi(bothSpecialDist->begin());
  TFloatList::const_iterator si(normalizers->begin());

  for(; di!=de; di++, e1i++, e2i++, avgi++, vari++, disti++, si++)
    if ((*e1i).varType == TValue::FLOATVAR) {
      if ((*e1i).isSpecial())
        if ((*e2i).isSpecial())
          dist += 2 * *vari;
        else {
          const float e2a = (*e2i).floatV - *avgi;
          if (normalize)
            dist += e2a*e2a + *vari * *si * *si;
          else
            dist += e2a*e2a + *vari;
        }
      else // e1i is not special
        if ((*e2i).isSpecial()) {
          const float e2a = (*e1i).floatV - *avgi;
          if (normalize)
            dist += e2a*e2a + *vari * *si * *si;
          else
            dist += e2a*e2a + *vari;
        }
      else // none is special
        dist += (*di) * (*di);
    }

    else if ((*e1i).varType == TValue::INTVAR) {
      if ((*e1i).isSpecial())
        if ((*e2i).isSpecial())
          dist += *bsi;
        else
          dist += 1 - (*disti)->p((*e2i).intV);
      else // e1i is not special
        if ((*e2i).isSpecial())
          dist += 1 - (*disti)->p((*e1i).intV);
        else
          if ((*e1i).intV != (*e2i).intV)
            dist += 1;
    }
    else
      dist += (*di)*(*di);

  return sqrt(dist);
}




TExamplesDistanceConstructor_Relief::TExamplesDistanceConstructor_Relief()
{}


PExamplesDistance TExamplesDistanceConstructor_Relief::operator()(PExampleGenerator gen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{ 
  const TDomain &domain = gen->domain.getReference();

  PVariable otherAttribute = domain.hasOtherAttributes();
  if (otherAttribute)
    raiseError("domain has attributes whose type is not supported by ReliefF (e.g. '%s')", otherAttribute->name.c_str());

  // for continuous attributes BasicAttrStat suffices; for discrete it does not
  const bool hasDiscrete = domain.hasDiscreteAttributes() || domain.classVar && (domain.classVar->varType == TValue::INTVAR);
  if (!bstat || (hasDiscrete && !ddist))
    if (!gen)
      raiseError("examples or domain distributions expected");
    else
      if (hasDiscrete)
        ddist = mlnew TDomainDistributions(gen, weightID);
      else
        bstat = mlnew TDomainBasicAttrStat(gen, weightID);

  TExamplesDistance_Relief *edr = mlnew TExamplesDistance_Relief();
  PExamplesDistance res = edr;

  if (!ignoreClass)
    raiseError("'ignoreClass' not supported");

  edr->averages       = mlnew TAttributedFloatList(gen->domain->attributes);
  edr->normalizations = mlnew TAttributedFloatList(gen->domain->attributes);
  edr->bothSpecial    = mlnew TAttributedFloatList(gen->domain->attributes);

  edr->distributions = CLONE(TDomainDistributions, ddist);
  if (ddist)
    edr->distributions->normalize();
  
  for(int attrIndex = 0, nAttrs = gen->domain->variables->size(); attrIndex != nAttrs; attrIndex++)
    if (domain.variables->at(attrIndex)->varType == TValue::FLOATVAR) {
      if (bstat) {
        const TBasicAttrStat &bas = bstat->at(attrIndex).getReference();
        edr->averages->push_back(bas.avg);
        edr->normalizations->push_back(bas.max - bas.min);
      }
      else {
        const TContDistribution *contd = ddist->at(attrIndex).AS(TContDistribution);
        if (contd->size()) {
          edr->averages->push_back(contd->average());
          edr->normalizations->push_back((*contd->distribution.rbegin()).first - (*contd->distribution.begin()).first);
        }
        else {
          edr->averages->push_back(0.0);
          edr->normalizations->push_back(1.0);
        }
      }
      edr->bothSpecial->push_back(0.5);
    }
  else {
    edr->averages->push_back(0.0);
    edr->normalizations->push_back(0.0);
    float dist = 1.0;
    const_PITERATE(TDiscDistribution, di, ddist->at(attrIndex).AS(TDiscDistribution))
      dist -= *di * *di;
    edr->bothSpecial->push_back(dist);
  }

  return res;
}


float TExamplesDistance_Relief::operator()(const TExample &e1, const TExample &e2) const
{ 
  checkProperty(averages);
  checkProperty(normalizations);
  checkProperty(bothSpecial);

  const bool hasDistributions = bool(distributions);

  TExample::const_iterator e1i(e1.begin()), e1e(e1.end());
  TExample::const_iterator e2i(e2.begin());
  TFloatList::const_iterator avgi(averages->begin()),
                             nori(normalizations->begin()),
                             btsi(bothSpecial->begin());

  TDomainDistributions::const_iterator di;
  if (hasDistributions)
    di = distributions->begin();

  float dist = 0.0;
  for(; e1i!=e1e; e1i++, e2i++, avgi++, nori++, btsi++) {
    float dd = 0.0;
    const TValue &v1 = *e1i, &v2 = *e2i;
    if (v1.varType==TValue::INTVAR) {             // discrete
      if (v1.isSpecial())
        if (v2.isSpecial()) 
          dd = *btsi;                               // both special
        else {
          if (!hasDistributions)
            raiseError("'distributions' not set; cannot deal with unknown values");
          dd = 1-(*di)->atint(v2.intV);        // v1 special
        }
      else
        if (v2.isSpecial()) {
          if (!hasDistributions)
            raiseError("'distributions' not set; cannot deal with unknown values");
          dd = 1-(*di)->atint(v1.intV);        // v2 special
        }
        else
          if (v1.intV != v2.intV)
            dd = 1.0;                               // both known, different
    }
    else if (*nori>0) {                           // continuous, and can normalize
      if (v1.isSpecial())
        if (v2.isSpecial()) 
          dd = float(0.5);                          // both special
        else
          dd = fabs(*avgi - v2.floatV) / *nori;      // v1 special
      else
        if (v2.isSpecial())
          dd = fabs(*avgi - v1.floatV) / *nori;      // v2 special
        else
          dd = fabs(v1.floatV - v2.floatV) / *nori;  // both known
    }

    dist += dd>1.0 ? 1.0 : dd;

    if (hasDistributions)
      di++;
  }

  return dist;
}


float TExamplesDistance_Relief::operator()(const int &attrNo, const TValue &v1, const TValue &v2) const
{
  float dd = -1.0;
  if (v1.varType==TValue::INTVAR) {                              // discrete
    if (v1.isSpecial())
      if (v2.isSpecial()) 
        dd = bothSpecial->at(attrNo);                                // both special
        else 
          dd = 1 - distributions->at(attrNo)->atint(v2.intV);   // v1 special
      else
        if (v2.isSpecial())  
          dd = 1 - distributions->at(attrNo)->atint(v1.intV);   // v2 special
        else
          dd = (v1.intV != v2.intV) ? 1.0 : 0.0;                     // both known
    }
    else if (normalizations->at(attrNo)>0) {                     // continuous, and can normalize
      if (v1.isSpecial())
        if (v2.isSpecial()) 
          dd = 0.5;                                                                  // both special
        else
          dd = fabs(averages->at(attrNo) - v2.floatV) / normalizations->at(attrNo);   // v1 special
      else
        if (v2.isSpecial())
          dd = fabs(averages->at(attrNo) - v1.floatV) / normalizations->at(attrNo);   // v2 special
        else
          dd = fabs(v1.floatV - v2.floatV) / normalizations->at(attrNo);              // both known
    }

    return dd>1.0 ? 1.0 : dd;
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -