📄 measures.cpp
字号:
const TDiscDistribution &dist = CAST_TO_DISCDISTRIBUTION(*ci);
if (ci == mostCommon) {
TDiscDistribution dist2 = dist;
dist2 += probabilities->innerDistributionUnknown;
N += dist2.cases;
continueCost += dist2.cases * majorityCost(dist2);
}
else {
N += dist.cases;
continueCost += dist.cases * majorityCost(dist.distribution);
}
}
if (unknownsTreatment == UnknownsAsValue) {
const float &cases = probabilities->innerDistributionUnknown->cases;
N += cases;
continueCost += cases * majorityCost(CAST_TO_DISCDISTRIBUTION(probabilities->innerDistributionUnknown));
}
continueCost /= N;
float cost = stopCost - continueCost;
if (unknownsTreatment == ReduceByUnknowns)
cost *= (outer.cases / (outer.unknowns + outer.cases));
return round0(cost);
}
TMeasureAttribute_MSE::TMeasureAttribute_MSE(const int &unkTreat)
: TMeasureAttribute(Contingency_Class, false, true),
m(0),
unknownsTreatment(unkTreat)
{}
float TMeasureAttribute_MSE::operator()(PContingency cont, PDistribution classDistribution, PDistribution apriorClass)
{
checkDiscreteContinuous(cont, "MeasureAttribute_MSE");
const TDistribution &outer = CAST_TO_DISCDISTRIBUTION(cont->outerDistribution);
if (cont->innerVariable->varType!=TValue::FLOATVAR)
raiseError("cannot evaluate attribute in domain with discrete classes");
if (cont->outerVariable->varType!=TValue::INTVAR)
raiseError("cannot evaluate continuous attributes");
const TContDistribution &classDist = CAST_TO_CONTDISTRIBUTION(classDistribution);
float W=classDist.abs;
if (W<=0)
return 0.0;
float I_orig=(classDist.sum2-classDist.sum*classDist.sum/W)/W;
if (I_orig<=0.0)
return 0.0;
TDistributionVector::const_iterator mostCommon = (unknownsTreatment == UnknownsToCommon)
? cont->discrete->begin() + outer.highestProbIntIndex()
: cont->discrete->end();
float I=0;
float downW=0;
const_ITERATE(TDistributionVector, ci, *cont->discrete) {
const TContDistribution &tdist = CAST_TO_CONTDISTRIBUTION(*ci);
if (ci==mostCommon) {
const float ssum2 = tdist.sum2 + cont->innerDistribution.AS(TContDistribution)->sum2;
const float ssum = tdist.sum + cont->innerDistribution.AS(TContDistribution)->sum;
const float sabs = tdist.abs + cont->innerDistribution.AS(TContDistribution)->abs;
I += ssum2 - ssum*ssum / sabs;
downW += sabs;
}
else {
if (tdist.abs>0) {
I += tdist.sum2 - tdist.sum*tdist.sum/tdist.abs;
downW += tdist.abs;
}
}
}
if (unknownsTreatment == UnknownsAsValue) {
const TContDistribution &tdist = CAST_TO_CONTDISTRIBUTION(cont->innerDistributionUnknown);
I += tdist.sum2 - tdist.sum*tdist.sum/tdist.abs;
downW += tdist.abs;
}
if (apriorClass && (m>0)) {
const TContDistribution &tdist = CAST_TO_CONTDISTRIBUTION(apriorClass);
I = (I + m * (tdist.sum2 - tdist.sum * tdist.sum/tdist.abs) / tdist.abs)
/ (downW + m);
}
else
I /= downW;
float mse = (I_orig - I)/I_orig;
if (unknownsTreatment == ReduceByUnknowns)
mse *= (outer.cases / (outer.unknowns + outer.cases));
return round0(mse);
}
TMeasureAttribute_relief::TMeasureAttribute_relief(int ak, int am)
: TMeasureAttribute(Generator, true, false),
k(ak),
m(am),
prevExamples(-1),
prevWeight(0)
{}
inline bool compare2nd(const pair<int, float> &o1, const pair<int, float> &o2)
{ return o1.second < o2.second; }
void TMeasureAttribute_relief::prepareNeighbours(PExampleGenerator gen, const int &weightID)
{
neighbourhood.clear();
if (!gen->domain->classVar)
raiseError("classless domain");
const bool regression = gen->domain->classVar->varType == TValue::FLOATVAR;
if (!regression && (gen->domain->classVar->varType != TValue::INTVAR))
raiseError("cannot compute ReliefF of a class that is neither discrete nor continuous");
storedExamples = mlnew TExampleTable(gen->domain, !gen.is_derived_from(TExampleTable));
TExampleTable &table = dynamic_cast<TExampleTable &>(storedExamples.getReference());
PEITERATE(ei, gen)
if (!(*ei).getClass().isSpecial())
table.addExample(*ei);
const int N = table.numberOfExamples();
if (!N)
raiseError("no examples with known class");
const int classIdx = table.domain->attributes->size();
vector<vector<int> > examplesByClasses(regression ? 1 : table.domain->classVar->noOfValues());
vector<vector<int > >::iterator ebcb, ebci, ebce;
float minCl, maxCl;
if (table.domain->classVar->varType==TValue::INTVAR) {
int index;
TExampleIterator ei;
for(ei = table.begin(), index = 0; ei; ++ei, index++)
examplesByClasses.at(int((*ei).getClass())).push_back(index);
for(ebcb = examplesByClasses.begin(), ebci = ebcb, ebce = examplesByClasses.end(); ebci != ebce; ) {
const int sze = (*ebci).size();
if (sze)
ebci++;
else {
examplesByClasses.erase(ebci);
ebce = examplesByClasses.end();
}
}
}
else {
ebcb = examplesByClasses.begin(), ebce = examplesByClasses.end();
ebcb->resize(N);
int i = 0;
for(vector<int>::iterator c0i(ebcb->begin()), c0e(ebcb->end()); c0i != c0e; *c0i++ = i++);
TExampleIterator ei(table.begin());
minCl = maxCl = (*ei).getClass().floatV;
while(++ei) {
const float tex = (*ei).getClass().floatV;
if (tex > maxCl)
maxCl = tex;
else if (tex < minCl)
minCl = tex;
}
}
distance = TExamplesDistanceConstructor_Relief()(gen);
const TExamplesDistance_Relief &rdistance = dynamic_cast<const TExamplesDistance_Relief &>(distance.getReference());
TRandomGenerator rgen(N);
int referenceIndex = 0;
const bool useAll = (m==-1) || (!weightID && (m>N));
float referenceExamples, referenceWeight;
for(referenceExamples = 0; useAll ? (referenceIndex < N) : (referenceExamples < m); referenceExamples += referenceWeight, referenceIndex++) {
if (!useAll)
referenceIndex = rgen.randlong(N);
TExample &referenceExample = table[referenceIndex];
referenceWeight = WEIGHT(referenceExample);
const TValue &referenceValue = referenceExample.getClass();
const int referenceClass= regression ? 0 : referenceExample.getClass().intV;
neighbourhood.push_back(referenceIndex);
vector<TNeighbourExample> &refNeighbours = neighbourhood.back().neighbours;
ndC = 0.0;
ITERATE(vector<vector<int> >, cli, examplesByClasses) {
const float inCliClass = (*cli).size();
const float classReferenceWeight =
regression ? referenceWeight
: referenceWeight * (referenceExample.getClass().intV == table[cli->front()].getClass().intV ? -1.0 : float(inCliClass) / float(N-inCliClass));
vector<pair<int, float> > distances(inCliClass);
vector<pair<int, float> >::iterator disti = distances.begin(), diste;
ITERATE(vector<int> , clii, *cli)
*disti++ = make_pair(*clii, rdistance(referenceExample, table[*clii]));
diste = distances.end();
disti = distances.begin();
sort(disti, diste, compare2nd);
int startNew = refNeighbours.size();
while(disti != diste && (disti->second <= 0))
disti++;
float inWeight, needwei;
for(needwei = k; (disti != diste) && (needwei > 1e-6); ) {
const float thisDist = disti->second;
inWeight = 0.0;
const int inAdded = refNeighbours.size();
do {
TExample &neighbourExample = table[disti->first];
const float neighbourWeight = WEIGHT(neighbourExample);
const float weightEE = neighbourWeight * referenceWeight;
inWeight += neighbourWeight;
if (regression) {
const float classDist = rdistance(classIdx, neighbourExample.getClass(), referenceValue);
refNeighbours.push_back(TNeighbourExample(disti->first,
weightEE * classDist,
weightEE));
ndC += weightEE * classDist;
}
else
refNeighbours.push_back(TNeighbourExample(disti->first, weightEE * (neighbourExample.getClass().intV == referenceClass ? -1 : 1)));
} while ((++disti != diste) && (disti->second == thisDist));
needwei -= inWeight;
}
if (k-needwei > 1) {
const float adj = 1.0 / (k - needwei);
if (regression)
for(vector<TNeighbourExample>::iterator ai(refNeighbours.begin() + startNew), ae(refNeighbours.end()); ai != ae; ai++) {
ai->weight *= adj;
ai->weightEE *= adj;
}
else
for(vector<TNeighbourExample>::iterator ai(refNeighbours.begin() + startNew), ae(refNeighbours.end()); ai != ae; ai++)
ai->weight *= adj;
}
}
}
if (regression)
m_ndC = referenceExamples - ndC;
else
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const float adj = 1.0 / referenceExamples;
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours)
nei->weight *= adj;
}
}
void TMeasureAttribute_relief::checkNeighbourhood(PExampleGenerator gen, const int &weightID)
{
if (!gen->domain->classVar)
raiseError("class-less domain");
if ((prevExamples != gen->version) || (weightID != prevWeight)) {
measures.clear();
prepareNeighbours(gen, weightID);
prevExamples = gen->version;
prevWeight = weightID;
}
}
float *tabulateContinuousValues(PExampleGenerator gen, const int &weightID, TVariable &variable,
float &min, float &max, float &avg, float &N)
{
float *pc, *precals;
precals = pc = new float[gen->numberOfExamples()];
avg = N = 0.0;
PEITERATE(ei, gen) {
const TValue &val = variable.computeValue(*ei);
if (val.isSpecial())
*pc++ = ILLEGAL_FLOAT;
else {
*pc++ = val.floatV;
if (N == 0.0)
max = min = val.floatV;
else if (val.floatV > max)
max = val.floatV;
else if (val.floatV < min)
min = val.floatV;
const float w = WEIGHT(*ei);
avg += w * val.floatV;
N += w;
}
}
if (N > 1e-6)
avg /= N;
return precals;
}
int *tabulateDiscreteValues(PExampleGenerator gen, const int &weightID, TVariable &variable,
float *&unk, float &bothUnk)
{
const int noVal = dynamic_cast<TEnumVariable &>(variable).noOfValues();
int *pc, *precals = pc = new int[gen->numberOfExamples()];
unk = new float[noVal];
try {
float *ui, *ue = unk + noVal;
for(ui = unk; ui != ue; *ui++ = 0.0);
int *pc = precals;
PEITERATE(ei, gen) {
const TValue &val = variable.computeValue(*ei);
if (val.isSpecial() || (val.intV >= noVal) || (val.intV < 0))
*pc++ = ILLEGAL_INT;
else {
*pc++ = val.intV;
unk[val.intV] += WEIGHT(*ei);
}
}
bothUnk = 1.0;
for(ui = unk; ui != ue; ui++) {
bothUnk -= *ui * *ui;
*ui = 1 - *ui;
}
}
catch (...) {
delete unk;
unk = NULL;
delete precals;
precals = NULL;
throw;
}
return precals;
}
float TMeasureAttribute_relief::operator()(PVariable var, PExampleGenerator gen, PDistribution aprior, int weightID)
{
checkNeighbourhood(gen, weightID);
// the attribute is in the domain
const int attrIdx = gen->domain->getVarNum(var, false);
if (attrIdx != ILLEGAL_INT) {
if (measures.empty()) {
const TExamplesDistance_Relief &rdistance = dynamic_cast<const TExamplesDistance_Relief &>(distance.getReference());
const TExampleTable &table = dynamic_cast<const TExampleTable &>(gen.getReference());
const int nAttrs = gen->domain->attributes->size();
measures = vector<float>(nAttrs, 0.0);
vector<float>::iterator mb(measures.begin()), mi;
const vector<float>::const_iterator me(measures.end());
TExample::const_iterator e1i, e1b, e2i;
int attrNo;
if (gen->domain->classVar->varType == TValue::FLOATVAR) {
vector<float> ndA(nAttrs, 0.0);
vector<float> ndCdA(nAttrs, 0.0);
vector<float>::iterator ndAb(ndA.begin()), ndAi;
const vector<float>::const_iterator ndAe(ndA.end());
vector<float>::iterator ndCdAb(ndCdA.begin()), ndCdAi;
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const TExample &referenceExample = table[rei->index];
e1b = referenceExample.begin();
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const float &weight = nei->weight;
const float &weightEE = nei->weightEE;
for(attrNo = 0, e1i = e1b, e2i = table[nei->index].begin(), ndAi = ndAb, ndCdAi = ndCdAb; ndAi != ndAe; ndAi++, ndCdAi++, e1i++, e2i++, attrNo++) {
const float attrDist = rdistance(attrNo, *e1i, *e2i);
*ndAi += weightEE * attrDist;
*ndCdAi += weight * attrDist;
}
}
}
for(ndAi = ndAb, ndCdAi = ndCdAb, mi = mb; mi != me; mi++, ndAi++, ndCdAi++)
*mi = *ndCdAi / ndC - (*ndAi - *ndCdAi) / m_ndC;
}
else {
ITERATE(vector<TReferenceExample>, rei, neighbourhood) {
const TExample &referenceExample = table[rei->index];
e1b = referenceExample.begin();
ITERATE(vector<TNeighbourExample>, nei, rei->neighbours) {
const float &weight = nei->weight;
for(attrNo = 0, e1i = e1b, e2i = table[nei->index].begin(), mi = mb; mi != me; e1i++, e2i++, mi++, attrNo++)
*mi += weight * rdistance(attrNo, *e1i, *e2i);
}
}
}
}
return measures[attrIdx];
}
// the attribute is not in the domain
else {
if (!var->getValueFrom)
raiseError("attribute is not among the domain attributes and cannot be computed from them");
const TExampleTable &table = dynamic_cast<const TExampleTable &>(gen.getReference());
TVariable &variable = var.getReference();
const int nExamples = gen->numberOfExamples();
PExamplesDistance distance;
// continuous attribute
if (variable.varType == TValue::FLOATVAR) {
float avg, min, max, N;
float *precals = tabulateContinuousValues(gen, weightID, variable, min, max, avg, N);
try {
if ((min == max) || (N < 1e-6)) {
delete precals;
return 0.0;
}
const float nor = 1.0 / (min-max);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -