📄 measures.cpp
字号:
// continuous attribute, continuous class
if (gen->domain->classVar->varType == TValue::FLOATVAR) {
float ndA = 0.0, ndCdA = 0.0;
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const float refVal = precals[rei->index];
if (refVal == ILLEGAL_FLOAT)
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const float neiVal = precals[nei->index];
const float attrDist = (neiVal == ILLEGAL_FLOAT) ? 0.5 : fabs(avg - neiVal) * nor;
ndA += nei->weightEE * attrDist;
ndCdA += nei->weight * attrDist;
}
else {
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const float neiVal = precals[nei->index];
const float attrDist = fabs(refVal - (neiVal == ILLEGAL_FLOAT ? avg : neiVal)) * nor;
ndA += nei->weightEE * attrDist;
ndCdA += nei->weight * attrDist;
}
}
}
delete precals;
return ndCdA / ndC - (ndA - ndCdA) / m_ndC;
}
// continuous attribute, discrete class
else {
float relf = 0.0;
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const float refVal = precals[rei->index];
if (refVal == ILLEGAL_FLOAT)
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const float neiVal = precals[nei->index];
const float attrDist = (neiVal == ILLEGAL_FLOAT) ? 0.5 : fabs(avg - neiVal) * nor;
relf += nei->weight * attrDist;
}
else {
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const float neiVal = precals[nei->index];
const float attrDist = fabs(refVal - (neiVal == ILLEGAL_FLOAT ? avg : neiVal)) * nor;
relf += nei->weight * attrDist;
}
}
}
delete precals;
return relf;
}
}
catch (...) {
delete precals;
throw;
}
}
// discrete attribute
else {
float *unk, bothUnk;
int *precals = tabulateDiscreteValues(gen, weightID, var.getReference(), unk, bothUnk);
try {
// discrete attribute, continuous class
if (gen->domain->classVar->varType == TValue::FLOATVAR) {
float ndA = 0.0, ndCdA = 0.0;
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const int refVal = precals[rei->index];
if (refVal == ILLEGAL_INT)
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const int neiVal = precals[nei->index];
const float attrDist = (neiVal == ILLEGAL_INT) ? bothUnk : unk[neiVal];
ndA += nei->weightEE * attrDist;
ndCdA += nei->weight * attrDist;
}
else {
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const int neiVal = precals[nei->index];
const float attrDist = (neiVal == ILLEGAL_INT) ? unk[refVal] : (refVal != neiVal ? 1.0 : 0.0);
ndA += nei->weightEE * attrDist;
ndCdA += nei->weight * attrDist;
}
}
}
delete unk;
delete precals;
return ndCdA / ndC - (ndA - ndCdA) / m_ndC;
}
// discrete attribute, discrete class
else {
float relf = 0.0;
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const int refVal = precals[rei->index];
if (refVal == ILLEGAL_FLOAT)
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const int neiVal = precals[nei->index];
relf += nei->weight * ((neiVal == ILLEGAL_INT) ? bothUnk : unk[neiVal]);
}
else {
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const int neiVal = precals[nei->index];
relf += nei->weight * ((neiVal == ILLEGAL_INT) ? unk[refVal] : (refVal != neiVal ? 1.0 : 0.0));
}
}
}
delete unk;
delete precals;
return relf;
}
}
catch (...) {
delete unk;
delete precals;
throw;
}
}
}
}
void TMeasureAttribute_relief::thresholdFunction(TFloatFloatList &res, PVariable var, PExampleGenerator gen, PDistribution, int weightID)
{
TFunctionAdder divs;
thresholdFunction(var, gen, divs, weightID);
res.clear();
float score = 0;
for(TFunctionAdder::const_iterator di(divs.begin()), de(divs.end()); di != de; di++)
res.push_back(make_pair(di->first, score += di->second));
}
float TMeasureAttribute_relief::bestThreshold(PDistribution &subsetSizes, float &bestScore, PVariable var, PExampleGenerator gen, PDistribution, int weightID, const float &minSubset)
{
TFunctionAdder divs;
int wins = 0;
float score = 0.0, bestThreshold;
TRandomGenerator rgen(gen->numberOfExamples());
if (minSubset > 0) {
float *attrVals;
thresholdFunction(var, gen, divs, weightID, &attrVals);
TContDistribution *valueDistribution;
PDistribution wvd;
if (attrVals) {
try {
float *vali = attrVals, *vale;
wvd = valueDistribution = new TContDistribution(var);
if (weightID)
for(TExampleIterator ei(gen->begin()); ei; ++ei, vali++)
if (*vali != ILLEGAL_FLOAT)
valueDistribution->addfloat(*vali, WEIGHT(*ei));
else
for(vali = attrVals, vale = attrVals + gen->numberOfExamples(); vali != vale; vali++)
if (*vali != ILLEGAL_FLOAT)
valueDistribution->addfloat(*vali);
}
catch (...) {
delete attrVals;
throw;
}
delete attrVals;
attrVals = NULL;
}
else {
wvd = new TContDistribution(gen, var, weightID);
valueDistribution = wvd.AS(TContDistribution);
}
float left = 0.0, right = valueDistribution->abs;
float bestLeft, bestRight;
map<float, float>::iterator distb(valueDistribution->begin()), diste(valueDistribution->end()), disti = distb, disti2;
for(TFunctionAdder::const_iterator di(divs.begin()), de(divs.end()); di != de; di++) {
score += di->second;
if (!wins || (score > bestScore) || (score == bestScore) && rgen.randbool(++wins)) {
for(; (disti != diste) && (disti->first <= di->first); disti++) {
left += disti->second;
right -= disti->second;
}
if ((left < minSubset))
continue;
if ((right < minSubset) || (disti == diste))
break;
if (!wins || (score > bestScore))
wins = 1;
bestScore = score;
bestLeft = left;
bestRight = right;
// disti cannot be distb (contemplate the above for)
disti2 = disti;
bestThreshold = (disti->first + (--disti2)->first) / 2.0;
}
}
if (!wins) {
subsetSizes = NULL;
return ILLEGAL_FLOAT;
}
subsetSizes = new TDiscDistribution(2);
subsetSizes->addint(0, bestLeft);
subsetSizes->addint(1, bestRight);
return bestThreshold;
}
else {
thresholdFunction(var, gen, divs, weightID);
for(TFunctionAdder::const_iterator db(divs.begin()), de(divs.end()), di = db, di2; di != de; di++) {
score += di->second;
if ( (!wins || (score > bestScore)) && ((wins=1) == 1)
|| (score == bestScore) && rgen.randbool(++wins)) {
di2 = di;
bestThreshold = (++di2 == de) && (--di2 == db) ? di->first : (di->first + di2->first) / 2.0;
bestScore = score;
}
}
subsetSizes = NULL;
return wins ? bestThreshold : ILLEGAL_FLOAT;
}
}
PSymMatrix TMeasureAttribute_relief::gainMatrix(PVariable var, PExampleGenerator gen, PDistribution, int weightID, int **attrVals, float **attrDistr)
{
TEnumVariable *evar = var.AS(TEnumVariable);
if (!evar)
raiseError("thresholdFunction can only be computed for continuous attributes");
checkNeighbourhood(gen, weightID);
TSymMatrix *gains = new TSymMatrix(evar->noOfValues());
PSymMatrix wgains = gains;
const int attrIdx = gen->domain->getVarNum(var, false);
const bool regression = gen->domain->classVar->varType == TValue::FLOATVAR;
if (attrIdx != ILLEGAL_INT) {
if (attrVals)
*attrVals = NULL;
if (attrDistr)
*attrDistr = NULL;
const TExamplesDistance_Relief &rdistance = dynamic_cast<const TExamplesDistance_Relief &>(distance.getReference());
const TExampleTable &table = dynamic_cast<const TExampleTable &>(gen.getReference());
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const TValue &refVal = table[rei->index][attrIdx];
if (refVal.isSpecial())
continue;
const int &refValI = refVal.intV;
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const TValue &neiVal = table[nei->index][attrIdx];
if (neiVal.isSpecial())
continue;
const float attrDist = rdistance(attrIdx, refVal, neiVal);
if (regression) {
const float dCdA = nei->weight * attrDist;
const float dA = nei->weightEE * attrDist;
gains->getref(refValI, neiVal.intV) += dCdA / ndC - (dA - dCdA) / m_ndC;
}
else
gains->getref(refValI, neiVal.intV) += nei->weight * attrDist;
}
}
}
else {
if (!var->getValueFrom)
raiseError("attribute is not among the domain attributes and cannot be computed from them");
float *unk, bothUnk;
int *precals = tabulateDiscreteValues(gen, weightID, var.getReference(), unk, bothUnk);
if (attrVals)
*attrVals = precals;
if (attrDistr) {
const int noVal = evar->noOfValues();
*attrDistr = new float[noVal];
for(float *ai = *attrDistr, *ui = unk, *ue = unk + noVal; ui != ue; *ai++ = 1 - *ui++);
}
try {
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const int refValI = precals[rei->index];
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const int neiVal = precals[nei->index];
const int attrDist = (refValI == ILLEGAL_INT) ? ((neiVal == ILLEGAL_INT) ? bothUnk : unk[neiVal])
: ((neiVal == ILLEGAL_INT) ? unk[refValI] : (refValI != neiVal ? 1.0 : 0.0));
if (attrDist == 0.0)
continue;
if (regression) {
const float dCdA = nei->weight * attrDist;
const float dA = nei->weightEE * attrDist;
gains->getref(refValI, neiVal) += dCdA / ndC - (dA - dCdA) / m_ndC;
}
else
gains->getref(refValI, neiVal) += nei->weight * attrDist;
}
}
delete unk;
if (!attrVals)
delete precals;
}
catch (...) {
if (unk)
delete unk;
if (precals)
delete precals;
throw;
}
}
return wgains;
}
PIntList TMeasureAttribute_relief::bestBinarization(PDistribution &subsetSizes, float &bestScore, PVariable var, PExampleGenerator gen, PDistribution apriorClass, int weightID, const float &minSubset)
{
TEnumVariable *evar = var.AS(TEnumVariable);
if (!evar)
raiseError("cannot discretly binarize a continuous attribute");
const int noVal = evar->noOfValues();
if (noVal > 16)
raiseError("cannot binarize an attribute with more than 16 values (it would take too long)");
float *attrDistr = NULL;
PSymMatrix wgain = gainMatrix(var, gen, apriorClass, weightID, NULL, &attrDistr);
TSymMatrix &gain = wgain.getReference();
float *gains = new float[noVal * noVal], *gi = gains, *ge;
int wins = 0, bestSubset;
float bestLeft, bestRight;
try {
float thisScore = 0.0;
int i, j;
for(i = 0; i < noVal; i++)
for(j = 0; j < noVal; j++)
*gi++ = gain.getitem(i, j);
float thisLeft = 0.0, thisRight = 0.0;
float *ai, *ae;
if (!attrDistr) {
TDiscDistribution dd(gen, var, weightID);
attrDistr = new float[noVal];
ai = attrDistr;
ae = attrDistr + noVal;
for(vector<float>::const_iterator di(dd.distribution.begin()); ai != ae; thisLeft += (*ai++ = *di++));
}
else
for(ai = attrDistr, ae = attrDistr + noVal; ai != ae; thisLeft += *ai++);
if (thisLeft < minSubset)
return NULL;
bestSubset = 0;
wins = 0;
bestLeft = thisLeft;
bestRight = 0.0;
bestScore = 0;
TRandomGenerator rgen(gen->numberOfExamples());
// if a bit in gray is 0, the corresponding value is on the left
for(int cnt = (1 << (noVal-1)) - 1, gray = 0; cnt; cnt--) {
int prevgray = gray;
gray = cnt ^ (cnt >> 1);
int graydiff = gray ^ prevgray;
int diffed;
for(diffed = 0; !(graydiff & 1); graydiff >>= 1, diffed++);
if (gray > prevgray) { // something went to the right; subtract all the gains for being different from values on the right
/* prevgray = gray; */ // unneeded: they only differ in the bit representing this group
for(gi = gains + diffed*noVal, ge = gi + noVal; gi != ge; thisScore += prevgray & 1 ? -*gi++ : *gi++, prevgray >>= 1);
thisLeft -= attrDistr[diffed];
thisRight += attrDistr[diffed];
}
else {
/* prevgray = gray; */ // unneeded: they only differ in the bit representing this group
for(gi = gains + diffed*noVal, ge = gi + noVal; gi != ge; thisScore += prevgray & 1 ? *gi++ : +*gi++, prevgray >>= 1);
thisLeft += attrDistr[diffed];
thisRight -= attrDistr[diffed];
}
if ( (thisLeft >= minSubset) && (thisRight >= minSubset)
&& ( (!wins || (thisScore > bestScore)) && ((wins=1) == 1)
|| (thisScore == bestScore) && rgen.randbool(++wins))) {
bestScore = thisScore;
bestSubset = gray;
bestLeft = thisLeft;
bestRight = thisRight;
}
}
delete gains;
gains = NULL;
if (!wins || !bestSubset) {
delete attrDistr;
return false;
}
ai = attrDistr;
TIntList *rightSide = new TIntList();
for(i = noVal; i--; bestSubset = bestSubset >> 1, ai++)
rightSide->push_back(*ai > 0 ? bestSubset & 1 : -1);
delete attrDistr;
attrDistr = NULL;
subsetSizes = new TDiscDistribution(2);
subsetSizes->addint(0, bestLeft);
subsetSizes->addint(1, bestRight);
return rightSide;
}
catch (...) {
if (gains)
delete gains;
if (attrDistr)
delete attrDistr;
throw;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -