⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 classifier.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
    else
      weight=weight*averageWeight;

    setMeta(dataDescription->missingWeight, TValue(weight));
  }
}


bool TExampleForMissing::nextExample()
{
  TVarList::const_iterator vi(domain->variables->begin());
  vector<int>::iterator ci, ei;

  // first DCs since they don't change weights. If one is increased, job is done and we return true
  for(ci=DCs.begin(), ei=DCs.end(); ci!=ei; ci++)
    if ((*(vi+*ci))->nextValue(operator[](*ci)))
      return true;
    else
      (*(vi+*ci))->firstValue(operator[](*ci));

  // if DCs or all exhausted, increase DKs
  for(ci=DKs.begin(), ei=DKs.end(); (ci!=ei) && !(*(vi+*ci))->nextValue(operator[](*ci)); ci++)
    (*(vi+*ci))->firstValue(operator[](*ci));

  if (ci==ei)
    return false;

  if (dataDescription->missingWeight && dataDescription->domainDistributions) {
    float weight=dataDescription->originalWeight ? getMeta(dataDescription->originalWeight).floatV : 1;
    if (dataDescription->domainDistributions) {
      TDomainDistributions::const_iterator di(dataDescription->domainDistributions->begin());
      ITERATE(vector<int>, ci, DKs) {
        // DKs contain only discrete variables, so it is safe to cast
        const TDiscDistribution &dist = CAST_TO_DISCDISTRIBUTION(*(di+*ci));
        weight*= dist[operator[](*ci).intV] / dist.abs;
      }
    }
    setMeta(dataDescription->missingWeight, TValue(weight));
  }

  return true;
}

bool TExampleForMissing::hasMissing() 
{ return DCs.size() || DKs.size(); }

/*  This method can be called by derived classes when example misses values and missing
    values are not tolerated by the model.
    Provided the data description for missing values it constructs the TExampleForMissing,
    calls the operator()(const TExample &) and returns the majority class of the weighted
    class distributions. */
TValue TClassifier::operator ()(const TExample &example, PEFMDataDescription dataDes)
{ if (classVar->varType==TValue::FLOATVAR)
    raiseError("classification with missing values imputation works only for discrete classes.");
  checkProperty(dataDes);

  TExampleForMissing exMissing(example, dataDes);
  exMissing.resetExample();
  TDiscDistribution classDist;
  do {
    TValue cv = operator()(exMissing);
    if (!cv.isSpecial())
      classDist.addint(cv.intV, dataDes->missingWeight ? float(exMissing[dataDes->missingWeight]) : 1.0);
  } while (exMissing.nextExample());

  return classDist.highestProbValue(example);
}

/*  This method can be called by derived classes when example misses values and missed
    values are not tolerated by the model.
    Provided the data description for missing values it constructs the TExampleForMissing,
    calls the classDistribution(const TExample &) and returns the weighted class distributions. */
PDistribution TClassifier::classDistribution(const TExample &example, PEFMDataDescription dataDes)
{
  TExampleForMissing exMissing(example, dataDes);
  exMissing.resetExample();
  TDistribution *classDist = TDistribution::create(classVar);
  PDistribution res = classDist;

  do
    if (dataDes->missingWeight)
      classDist->operator += ((classDistribution(exMissing)->operator *= (exMissing[dataDes->missingWeight])));
    else 
      classDist->operator += (classDistribution(exMissing).getReference());
  while (exMissing.nextExample());
 
  return res;
}



TClassifierFD::TClassifierFD(const bool &cp)
: TClassifier(cp)
{}


TClassifierFD::TClassifierFD(PDomain dom, const bool &cp)
: TClassifier(dom ? dom->classVar : PVariable(), cp),
  domain(dom)
{}
  

TClassifierFD::TClassifierFD(const TClassifierFD &old)
: TClassifier(old),
  domain(old.domain)
{}


void TClassifierFD::afterSet(const char *name)
{
  if (!strcmp(name, "domain"))
    classVar = domain->classVar;

  TClassifier::afterSet(name);
}




TDefaultClassifier::TDefaultClassifier()
: TClassifier(true)
{}


TDefaultClassifier::TDefaultClassifier(PVariable acv) 
: TClassifier(acv, true),
  defaultVal(acv ? acv->DK() : TValue()), 
  defaultDistribution(TDistribution::create(acv))
{}


TDefaultClassifier::TDefaultClassifier(PVariable acv, PDistribution defDis)
: TClassifier(acv, true),
  defaultVal(),
  defaultDistribution(defDis)
{}


TDefaultClassifier::TDefaultClassifier(PVariable acv, const TValue &defVal, PDistribution defDis)
: TClassifier(acv, true),
  defaultVal(defVal),
  defaultDistribution(defDis)
{}


TDefaultClassifier::TDefaultClassifier(const TDefaultClassifier &old)
: TClassifier(dynamic_cast<const TClassifier &>(old)),
  defaultVal(old.defaultVal), 
  defaultDistribution(CLONE(TDistribution, old.defaultDistribution))
{}


TValue TDefaultClassifier::operator ()(const TExample &exam)
{ if (defaultVal.isSpecial())
    return defaultDistribution->supportsContinuous ? TValue(defaultDistribution->average()) : defaultDistribution->highestProbValue(exam);

  return defaultVal;
}


PDistribution TDefaultClassifier::classDistribution(const TExample &)
{ 
  if (defaultDistribution)
    return CLONE(TDistribution, defaultDistribution);

  if (!classVar || defaultVal.isSpecial())
    checkProperty(defaultDistribution); // we call it to raise an exception

  PDistribution dist = TDistribution::create(classVar);
  dist->add(defaultVal);
  return dist;
}


void TDefaultClassifier::predictionAndDistribution(const TExample &exam, TValue &val, PDistribution &dist)
{ 
  if (defaultVal.isSpecial()) {
    checkProperty(defaultDistribution);
    val = defaultDistribution->supportsContinuous ? TValue(defaultDistribution->average()) : defaultDistribution->highestProbValue(exam);
  }
  else
    val = defaultVal;

  if (defaultDistribution)
    dist = CLONE(TDistribution, defaultDistribution);
  else {
    if (!classVar)
      checkProperty(defaultDistribution); // we call it to raise an exception
    dist = TDistribution::create(classVar);
    dist->add(defaultVal);
  }
}




TRandomClassifier::TRandomClassifier(PVariable acv)
: TClassifier(acv),
  probabilities(acv ? TDistribution::create(acv) : PDistribution())
{
  if (probabilities)
    // if distribution is discrete, it sets probabilities to 1/acv->noOfValues
    probabilities->normalize();
}


TRandomClassifier::TRandomClassifier(const TDistribution &probs)
: TClassifier(),
  probabilities(CLONE(TDistribution, &probs))
{ probabilities->normalize(); }


TRandomClassifier::TRandomClassifier(PVariable acv, const TDistribution &probs)
: TClassifier(acv),
  probabilities(CLONE(TDistribution, &probs))
{ probabilities->normalize(); }


TRandomClassifier::TRandomClassifier(PDistribution probs)
: TClassifier(),
  probabilities(probs)
{ probabilities->normalize(); }


TRandomClassifier::TRandomClassifier(PVariable acv, PDistribution probs)
: TClassifier(acv),
  probabilities(probs)
{ probabilities->normalize(); }


TValue TRandomClassifier::operator()(const TExample &ex)
{ 
 if (!probabilities) {
    checkProperty(classVar);
    return classVar->randomValue();
  }

  return probabilities->randomValue(ex.sumValues());
}


PDistribution TRandomClassifier::classDistribution(const TExample &)
{ checkProperty(probabilities);
  return CLONE(TDistribution, probabilities); }

     
void TRandomClassifier::predictionAndDistribution(const TExample &, TValue &val, PDistribution &dist)
{ checkProperty(probabilities);
  val = probabilities->randomValue();
  dist = CLONE(TDistribution, probabilities);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -