📄 distance.cpp
字号:
normalized.push_back((*ei).floatV);
}
}
/*TExamplesDistanceConstructor_Maximal::TExamplesDistanceConstructor_Maximal()
{}
*/
TExamplesDistanceConstructor_Euclidean::TExamplesDistanceConstructor_Euclidean()
{}
TExamplesDistanceConstructor_Manhattan::TExamplesDistanceConstructor_Manhattan()
{}
PExamplesDistance TExamplesDistanceConstructor_Maximal::operator()(PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{ return mlnew TExamplesDistance_Maximal(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, bstat); }
PExamplesDistance TExamplesDistanceConstructor_Manhattan::operator()(PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{ return mlnew TExamplesDistance_Manhattan(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, bstat); }
PExamplesDistance TExamplesDistanceConstructor_Euclidean::operator()(PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{ return mlnew TExamplesDistance_Euclidean(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, bstat); }
TExamplesDistance_Maximal::TExamplesDistance_Maximal()
{}
TExamplesDistance_Manhattan::TExamplesDistance_Manhattan()
{}
TExamplesDistance_Euclidean::TExamplesDistance_Euclidean()
{}
TExamplesDistance_Maximal::TExamplesDistance_Maximal(const bool &ignoreClass, const bool &normalize, const bool &ignoreUnknowns, PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat dstat)
: TExamplesDistance_Normalized(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, dstat)
{}
TExamplesDistance_Manhattan::TExamplesDistance_Manhattan(const bool &ignoreClass, const bool &normalize, const bool &ignoreUnknowns, PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat dstat)
: TExamplesDistance_Normalized(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, dstat)
{}
TExamplesDistance_Euclidean::TExamplesDistance_Euclidean(const bool &ignoreClass, const bool &normalize, const bool &ignoreUnknowns, PExampleGenerator egen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat dstat)
: TExamplesDistance_Normalized(ignoreClass, normalize, ignoreUnknowns, egen, weightID, ddist, dstat),
distributions(mlnew TDomainDistributions(egen, weightID, false, true)),
bothSpecialDist(mlnew TAttributedFloatList())
{
bothSpecialDist->attributes = averages->attributes;
PITERATE(TDomainDistributions, di, distributions) {
if (*di) {
float sum2 = 0;
TDiscDistribution *distr = (*di).AS(TDiscDistribution);
ITERATE(vector<float>, pi, distr->distribution)
sum2 += (*pi) * (*pi);
sum2 /= distr->abs * distr->abs;
bothSpecialDist->push_back(1-sum2);
}
else
bothSpecialDist->push_back(0.0);
}
}
float TExamplesDistance_Maximal::operator ()(const TExample &e1, const TExample &e2) const
{
vector<float> difs;
getDifs(e1, e2, difs);
return difs.size() ? *max_element(difs.begin(), difs.end()) : 0.0;
}
float TExamplesDistance_Manhattan::operator ()(const TExample &e1, const TExample &e2) const
{
vector<float> difs;
getDifs(e1, e2, difs);
float dist = 0.0;
const_ITERATE(vector<float>, di, difs)
dist += *di;
return dist;
}
float TExamplesDistance_Euclidean::operator ()(const TExample &e1, const TExample &e2) const
{
vector<float> difs;
getDifs(e1, e2, difs);
float dist = 0.0;
TExample::const_iterator e1i(e1.begin()), e2i(e2.begin());
TFloatList::const_iterator avgi(averages->begin()), vari(variances->begin());
vector<float>::const_iterator di(difs.begin()), de(difs.end());
TDomainDistributions::const_iterator disti(distributions->begin());
TFloatList::const_iterator bsi(bothSpecialDist->begin());
TFloatList::const_iterator si(normalizers->begin());
for(; di!=de; di++, e1i++, e2i++, avgi++, vari++, disti++, si++)
if ((*e1i).varType == TValue::FLOATVAR) {
if ((*e1i).isSpecial())
if ((*e2i).isSpecial())
dist += 2 * *vari;
else {
const float e2a = (*e2i).floatV - *avgi;
if (normalize)
dist += e2a*e2a + *vari * *si * *si;
else
dist += e2a*e2a + *vari;
}
else // e1i is not special
if ((*e2i).isSpecial()) {
const float e2a = (*e1i).floatV - *avgi;
if (normalize)
dist += e2a*e2a + *vari * *si * *si;
else
dist += e2a*e2a + *vari;
}
else // none is special
dist += (*di) * (*di);
}
else if ((*e1i).varType == TValue::INTVAR) {
if ((*e1i).isSpecial())
if ((*e2i).isSpecial())
dist += *bsi;
else
dist += 1 - (*disti)->p((*e2i).intV);
else // e1i is not special
if ((*e2i).isSpecial())
dist += 1 - (*disti)->p((*e1i).intV);
else
if ((*e1i).intV != (*e2i).intV)
dist += 1;
}
else
dist += (*di)*(*di);
return sqrt(dist);
}
TExamplesDistanceConstructor_Relief::TExamplesDistanceConstructor_Relief()
{}
PExamplesDistance TExamplesDistanceConstructor_Relief::operator()(PExampleGenerator gen, const int &weightID, PDomainDistributions ddist, PDomainBasicAttrStat bstat) const
{
const TDomain &domain = gen->domain.getReference();
PVariable otherAttribute = domain.hasOtherAttributes();
if (otherAttribute)
raiseError("domain has attributes whose type is not supported by ReliefF (e.g. '%s')", otherAttribute->name.c_str());
// for continuous attributes BasicAttrStat suffices; for discrete it does not
const bool hasDiscrete = domain.hasDiscreteAttributes() || domain.classVar && (domain.classVar->varType == TValue::INTVAR);
if (!bstat || (hasDiscrete && !ddist))
if (!gen)
raiseError("examples or domain distributions expected");
else
if (hasDiscrete)
ddist = mlnew TDomainDistributions(gen, weightID);
else
bstat = mlnew TDomainBasicAttrStat(gen, weightID);
TExamplesDistance_Relief *edr = mlnew TExamplesDistance_Relief();
PExamplesDistance res = edr;
if (!ignoreClass)
raiseError("'ignoreClass' not supported");
edr->averages = mlnew TAttributedFloatList(gen->domain->attributes);
edr->normalizations = mlnew TAttributedFloatList(gen->domain->attributes);
edr->bothSpecial = mlnew TAttributedFloatList(gen->domain->attributes);
edr->distributions = CLONE(TDomainDistributions, ddist);
if (ddist)
edr->distributions->normalize();
for(int attrIndex = 0, nAttrs = gen->domain->variables->size(); attrIndex != nAttrs; attrIndex++)
if (domain.variables->at(attrIndex)->varType == TValue::FLOATVAR) {
if (bstat) {
const TBasicAttrStat &bas = bstat->at(attrIndex).getReference();
edr->averages->push_back(bas.avg);
edr->normalizations->push_back(bas.max - bas.min);
}
else {
const TContDistribution *contd = ddist->at(attrIndex).AS(TContDistribution);
if (contd->size()) {
edr->averages->push_back(contd->average());
edr->normalizations->push_back((*contd->distribution.rbegin()).first - (*contd->distribution.begin()).first);
}
else {
edr->averages->push_back(0.0);
edr->normalizations->push_back(1.0);
}
}
edr->bothSpecial->push_back(0.5);
}
else {
edr->averages->push_back(0.0);
edr->normalizations->push_back(0.0);
float dist = 1.0;
const_PITERATE(TDiscDistribution, di, ddist->at(attrIndex).AS(TDiscDistribution))
dist -= *di * *di;
edr->bothSpecial->push_back(dist);
}
return res;
}
float TExamplesDistance_Relief::operator()(const TExample &e1, const TExample &e2) const
{
checkProperty(averages);
checkProperty(normalizations);
checkProperty(bothSpecial);
const bool hasDistributions = bool(distributions);
TExample::const_iterator e1i(e1.begin()), e1e(e1.end());
TExample::const_iterator e2i(e2.begin());
TFloatList::const_iterator avgi(averages->begin()),
nori(normalizations->begin()),
btsi(bothSpecial->begin());
TDomainDistributions::const_iterator di;
if (hasDistributions)
di = distributions->begin();
float dist = 0.0;
for(; e1i!=e1e; e1i++, e2i++, avgi++, nori++, btsi++) {
float dd = 0.0;
const TValue &v1 = *e1i, &v2 = *e2i;
if (v1.varType==TValue::INTVAR) { // discrete
if (v1.isSpecial())
if (v2.isSpecial())
dd = *btsi; // both special
else {
if (!hasDistributions)
raiseError("'distributions' not set; cannot deal with unknown values");
dd = 1-(*di)->atint(v2.intV); // v1 special
}
else
if (v2.isSpecial()) {
if (!hasDistributions)
raiseError("'distributions' not set; cannot deal with unknown values");
dd = 1-(*di)->atint(v1.intV); // v2 special
}
else
if (v1.intV != v2.intV)
dd = 1.0; // both known, different
}
else if (*nori>0) { // continuous, and can normalize
if (v1.isSpecial())
if (v2.isSpecial())
dd = float(0.5); // both special
else
dd = fabs(*avgi - v2.floatV) / *nori; // v1 special
else
if (v2.isSpecial())
dd = fabs(*avgi - v1.floatV) / *nori; // v2 special
else
dd = fabs(v1.floatV - v2.floatV) / *nori; // both known
}
dist += dd>1.0 ? 1.0 : dd;
if (hasDistributions)
di++;
}
return dist;
}
float TExamplesDistance_Relief::operator()(const int &attrNo, const TValue &v1, const TValue &v2) const
{
float dd = -1.0;
if (v1.varType==TValue::INTVAR) { // discrete
if (v1.isSpecial())
if (v2.isSpecial())
dd = bothSpecial->at(attrNo); // both special
else
dd = 1 - distributions->at(attrNo)->atint(v2.intV); // v1 special
else
if (v2.isSpecial())
dd = 1 - distributions->at(attrNo)->atint(v1.intV); // v2 special
else
dd = (v1.intV != v2.intV) ? 1.0 : 0.0; // both known
}
else if (normalizations->at(attrNo)>0) { // continuous, and can normalize
if (v1.isSpecial())
if (v2.isSpecial())
dd = 0.5; // both special
else
dd = fabs(averages->at(attrNo) - v2.floatV) / normalizations->at(attrNo); // v1 special
else
if (v2.isSpecial())
dd = fabs(averages->at(attrNo) - v1.floatV) / normalizations->at(attrNo); // v2 special
else
dd = fabs(v1.floatV - v2.floatV) / normalizations->at(attrNo); // both known
}
return dd>1.0 ? 1.0 : dd;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -