📄 classifier.cpp
字号:
else
weight=weight*averageWeight;
setMeta(dataDescription->missingWeight, TValue(weight));
}
}
bool TExampleForMissing::nextExample()
{
TVarList::const_iterator vi(domain->variables->begin());
vector<int>::iterator ci, ei;
// first DCs since they don't change weights. If one is increased, job is done and we return true
for(ci=DCs.begin(), ei=DCs.end(); ci!=ei; ci++)
if ((*(vi+*ci))->nextValue(operator[](*ci)))
return true;
else
(*(vi+*ci))->firstValue(operator[](*ci));
// if DCs or all exhausted, increase DKs
for(ci=DKs.begin(), ei=DKs.end(); (ci!=ei) && !(*(vi+*ci))->nextValue(operator[](*ci)); ci++)
(*(vi+*ci))->firstValue(operator[](*ci));
if (ci==ei)
return false;
if (dataDescription->missingWeight && dataDescription->domainDistributions) {
float weight=dataDescription->originalWeight ? getMeta(dataDescription->originalWeight).floatV : 1;
if (dataDescription->domainDistributions) {
TDomainDistributions::const_iterator di(dataDescription->domainDistributions->begin());
ITERATE(vector<int>, ci, DKs) {
// DKs contain only discrete variables, so it is safe to cast
const TDiscDistribution &dist = CAST_TO_DISCDISTRIBUTION(*(di+*ci));
weight*= dist[operator[](*ci).intV] / dist.abs;
}
}
setMeta(dataDescription->missingWeight, TValue(weight));
}
return true;
}
bool TExampleForMissing::hasMissing()
{ return DCs.size() || DKs.size(); }
/* This method can be called by derived classes when example misses values and missing
values are not tolerated by the model.
Provided the data description for missing values it constructs the TExampleForMissing,
calls the operator()(const TExample &) and returns the majority class of the weighted
class distributions. */
TValue TClassifier::operator ()(const TExample &example, PEFMDataDescription dataDes)
{ if (classVar->varType==TValue::FLOATVAR)
raiseError("classification with missing values imputation works only for discrete classes.");
checkProperty(dataDes);
TExampleForMissing exMissing(example, dataDes);
exMissing.resetExample();
TDiscDistribution classDist;
do {
TValue cv = operator()(exMissing);
if (!cv.isSpecial())
classDist.addint(cv.intV, dataDes->missingWeight ? float(exMissing[dataDes->missingWeight]) : 1.0);
} while (exMissing.nextExample());
return classDist.highestProbValue(example);
}
/* This method can be called by derived classes when example misses values and missed
values are not tolerated by the model.
Provided the data description for missing values it constructs the TExampleForMissing,
calls the classDistribution(const TExample &) and returns the weighted class distributions. */
PDistribution TClassifier::classDistribution(const TExample &example, PEFMDataDescription dataDes)
{
TExampleForMissing exMissing(example, dataDes);
exMissing.resetExample();
TDistribution *classDist = TDistribution::create(classVar);
PDistribution res = classDist;
do
if (dataDes->missingWeight)
classDist->operator += ((classDistribution(exMissing)->operator *= (exMissing[dataDes->missingWeight])));
else
classDist->operator += (classDistribution(exMissing).getReference());
while (exMissing.nextExample());
return res;
}
TClassifierFD::TClassifierFD(const bool &cp)
: TClassifier(cp)
{}
TClassifierFD::TClassifierFD(PDomain dom, const bool &cp)
: TClassifier(dom ? dom->classVar : PVariable(), cp),
domain(dom)
{}
TClassifierFD::TClassifierFD(const TClassifierFD &old)
: TClassifier(old),
domain(old.domain)
{}
void TClassifierFD::afterSet(const char *name)
{
if (!strcmp(name, "domain"))
classVar = domain->classVar;
TClassifier::afterSet(name);
}
TDefaultClassifier::TDefaultClassifier()
: TClassifier(true)
{}
TDefaultClassifier::TDefaultClassifier(PVariable acv)
: TClassifier(acv, true),
defaultVal(acv ? acv->DK() : TValue()),
defaultDistribution(TDistribution::create(acv))
{}
TDefaultClassifier::TDefaultClassifier(PVariable acv, PDistribution defDis)
: TClassifier(acv, true),
defaultVal(),
defaultDistribution(defDis)
{}
TDefaultClassifier::TDefaultClassifier(PVariable acv, const TValue &defVal, PDistribution defDis)
: TClassifier(acv, true),
defaultVal(defVal),
defaultDistribution(defDis)
{}
TDefaultClassifier::TDefaultClassifier(const TDefaultClassifier &old)
: TClassifier(dynamic_cast<const TClassifier &>(old)),
defaultVal(old.defaultVal),
defaultDistribution(CLONE(TDistribution, old.defaultDistribution))
{}
TValue TDefaultClassifier::operator ()(const TExample &exam)
{ if (defaultVal.isSpecial())
return defaultDistribution->supportsContinuous ? TValue(defaultDistribution->average()) : defaultDistribution->highestProbValue(exam);
return defaultVal;
}
PDistribution TDefaultClassifier::classDistribution(const TExample &)
{
if (defaultDistribution)
return CLONE(TDistribution, defaultDistribution);
if (!classVar || defaultVal.isSpecial())
checkProperty(defaultDistribution); // we call it to raise an exception
PDistribution dist = TDistribution::create(classVar);
dist->add(defaultVal);
return dist;
}
void TDefaultClassifier::predictionAndDistribution(const TExample &exam, TValue &val, PDistribution &dist)
{
if (defaultVal.isSpecial()) {
checkProperty(defaultDistribution);
val = defaultDistribution->supportsContinuous ? TValue(defaultDistribution->average()) : defaultDistribution->highestProbValue(exam);
}
else
val = defaultVal;
if (defaultDistribution)
dist = CLONE(TDistribution, defaultDistribution);
else {
if (!classVar)
checkProperty(defaultDistribution); // we call it to raise an exception
dist = TDistribution::create(classVar);
dist->add(defaultVal);
}
}
TRandomClassifier::TRandomClassifier(PVariable acv)
: TClassifier(acv),
probabilities(acv ? TDistribution::create(acv) : PDistribution())
{
if (probabilities)
// if distribution is discrete, it sets probabilities to 1/acv->noOfValues
probabilities->normalize();
}
TRandomClassifier::TRandomClassifier(const TDistribution &probs)
: TClassifier(),
probabilities(CLONE(TDistribution, &probs))
{ probabilities->normalize(); }
TRandomClassifier::TRandomClassifier(PVariable acv, const TDistribution &probs)
: TClassifier(acv),
probabilities(CLONE(TDistribution, &probs))
{ probabilities->normalize(); }
TRandomClassifier::TRandomClassifier(PDistribution probs)
: TClassifier(),
probabilities(probs)
{ probabilities->normalize(); }
TRandomClassifier::TRandomClassifier(PVariable acv, PDistribution probs)
: TClassifier(acv),
probabilities(probs)
{ probabilities->normalize(); }
TValue TRandomClassifier::operator()(const TExample &ex)
{
if (!probabilities) {
checkProperty(classVar);
return classVar->randomValue();
}
return probabilities->randomValue(ex.sumValues());
}
PDistribution TRandomClassifier::classDistribution(const TExample &)
{ checkProperty(probabilities);
return CLONE(TDistribution, probabilities); }
void TRandomClassifier::predictionAndDistribution(const TExample &, TValue &val, PDistribution &dist)
{ checkProperty(probabilities);
val = probabilities->randomValue();
dist = CLONE(TDistribution, probabilities);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -