📄 decomposition.cpp
字号:
class TDistRec {
public:
float dist;
long randoff;
vector<TCI_w>::iterator exPointer;
TDistRec(vector<TCI_w>::iterator ep, const int &roff, const float &adist)
: dist(adist),
randoff(roff),
exPointer(ep)
{};
bool operator <(const TDistRec &other) const
{ return (dist==other.dist) ? (randoff<other.randoff) : (dist<other.dist); }
bool operator !=(const TDistRec &other) const
{ return (dist!=other.dist) || (randoff!=other.randoff); }
};
TIMByRowsByRelief::TIMByRowsByRelief()
: k(10),
m(50),
kFromColumns(0.0),
ignoreSameExample(false),
convertToBinary(false),
correctClassFirst(false),
allExamples(false),
allSameNeighbours(false)
{}
PIMByRows TIMByRowsByRelief::operator()(PExampleGenerator gen, const vector<bool> &, const TVarList &aboundSet, const vector<bool> &free, const int &weightID)
{
int classes = (gen->domain->classVar && (gen->domain->classVar->varType==TValue::INTVAR))
? gen->domain->classVar->noOfValues()
: -1;
if (classes==-1)
raiseError("these is no class or it is not discrete.");
TRandomGenerator rgen(gen->numberOfExamples());
PDomain boundDomain=mlnew TDomain(PVariable(), aboundSet);
PDomain freeDomain;
{
TVarList freeSet;
{ TVarList::const_iterator vi(gen->domain->attributes->begin());
const_ITERATE(vector<bool>, fi, free) {
if (*fi)
freeSet.push_back(*vi);
vi++;
}
}
freeDomain=mlnew TDomain(gen->domain->classVar, freeSet);
}
vector<int> values;
int columns = 1;
const_ITERATE(TVarList, bvi, aboundSet) {
int tvalues=(*bvi)->noOfValues();
values.push_back(tvalues);
columns*=tvalues;
}
PIMByRows im = mlnew TIMByRows(gen->domain->classVar->varType);
im->columnExamples = vector<PExample>(columns, PExample());
float myK = (kFromColumns>0.0) ? kFromColumns*columns : k;
// converts examples to freeDomain
// and divides them info classTables
TExampleTable freeExamples(freeDomain);
vector<vector<TCI_w> > classTables;
vector<float> gN;
for(int cl = classes; cl--; ) {
classTables.push_back(vector<TCI_w>());
gN.push_back(0.0);
}
{
int freeIndex = 0;
PEITERATE(ei, gen) {
if ((*ei).getClass().isSpecial())
continue;
TExample boundExample(boundDomain, *ei);
int ci = 0;
for (int i = 0, vsize = values.size(); i<vsize; i++)
if (boundExample[i].isSpecial())
raiseError("attribute '%s' has undefined values", aboundSet[i]->name.c_str());
else
ci = ci*values[i]+boundExample[i].intV;
TExample freeExample(freeDomain, *ei);
if (weightID)
freeExample.setMeta(weightID, TValue(WEIGHT(*ei)));
freeExamples.addExample(*ei);
int classIndex = (*ei).getClass().intV;
classTables[classIndex].push_back(TCI_w(ci, freeIndex++));
gN[classIndex] += WEIGHT(*ei);
if (!im->columnExamples[ci])
im->columnExamples[ci]=mlnew TExample(boundDomain, boundExample);
}
}
PExamplesDistance_Relief useDistance = TExamplesDistanceConstructor_Relief()(PExampleGenerator(freeExamples), weightID);
float gNtot = 0.0;
ITERATE(vector<float>, gi, gN)
gNtot += *gi;
// the total number of examples
long N = freeExamples.numberOfExamples();
float actualN = 0;
long eNum = -1;
bool myAllExamples = allExamples || (m>N);
for(float referenceExamples = 0, refWeight; myAllExamples ? (eNum+1<N) : (referenceExamples<m); referenceExamples+=refWeight) {
// choose a random or consecutive example
// This is probably not correct - examples with lower weights should have less chance to
// be chosen. Neither multiplying the line with a low weight does not amortize for this
// since this same example can be chosen on and on...
eNum = myAllExamples ? eNum+1 : rgen.randlong(N);
TExample &example = freeExamples[eNum];
int eClass = example.getClass();
refWeight = WEIGHT(example);
im->rows.push_back(TDIMRow(mlnew TExample(example), columns, classes));
// for each class
for(int oClass = 0; oClass<classes; oClass++)
if (classTables[oClass].size()>0) {
int adjustedClassIndex;
if (convertToBinary)
adjustedClassIndex = (oClass==eClass) ? 0 : 1;
else if (correctClassFirst)
adjustedClassIndex= (oClass==eClass) ? 0
: ( (oClass>eClass) ? oClass : oClass+1);
else
adjustedClassIndex=oClass;
// sort the examples by the distance
set<TDistRec> neighset;
ITERATE(vector<TCI_w>, epi, classTables[oClass])
neighset.insert(TDistRec(epi, rgen.randlong(), useDistance->operator()(example, freeExamples[(*epi).freeIndex])));
float classWeight = adjustedClassIndex ? gN[oClass] / (gNtot-gN[eClass]) : 1.0;
set<TDistRec>::iterator ni(neighset.begin()), ne(neighset.end());
if (ignoreSameExample)
while(((*ni).dist<=0) && (ni!=ne))
ni++;
for(float needwei = myK, compWeight; (ni!=ne) && ((needwei>0) || allSameNeighbours && ((*ni).dist<=0)); needwei-=compWeight, ni++) {
// determine the weight for the current example
TCI_w &CI_w =* (*ni).exPointer;
compWeight = WEIGHT(freeExamples[CI_w.freeIndex]);
if (compWeight>needwei)
compWeight = needwei;
float koe = refWeight*compWeight*classWeight;
actualN += fabs(koe);
im->rows.back().nodes[CI_w.columnIndex][adjustedClassIndex] += koe;
}
}
}
return im;
}
TIMBlurer::TIMBlurer(const float &w, const float &ow, const bool &aw, const bool &oe)
: weight(w),
origWeight(ow),
adjustOrigWeight(aw),
onlyEmpty(oe)
{}
TIMBlurer::TIMBlurer(PFloatList aaw, const float &ow, const bool &aw, const bool &oe)
: weight(-1),
origWeight(ow),
attrWeights(aaw),
adjustOrigWeight(aw),
onlyEmpty(oe)
{}
bool TIMBlurer::operator()(PIMByRows im)
{ TVarList &attributes = im->rows.front().example->domain->attributes.getReference();
int attrs = attributes.size();
int columns = im->rows.front().nodes.size();
int classes = im->rows.front().noOfValues;
float actOrigWeight;
PFloatList myAttrWeights;
if (weight>0.0) {
if (weight>1.0)
raiseError("weight is %5.3f; it should be lower than 1.0", weight);
myAttrWeights = mlnew TFloatList(attrs, weight);
actOrigWeight = adjustOrigWeight ? 1.0-weight : origWeight;
}
else {
if (attrWeights && int(attrWeights->size())!=attrs)
raiseError("invalid 'attrWeights' (size does not match the number of attributes)");
myAttrWeights = attrWeights;
if (adjustOrigWeight) {
float sum = 0.0;
PITERATE(TFloatList, wi, attrWeights)
sum += *wi;
actOrigWeight = sum>1.0 ? origWeight : 1.0-sum;
}
else
actOrigWeight=origWeight;
}
vector<vector<float *> > impose;
impose.reserve(im->rows.size());
vector<TDIMRow *> *sortedRows = mlnew vector<TDIMRow *>();
sortedRows->reserve(im->rows.size());
ITERATE(vector<TDIMRow>, ri, im->rows) {
impose.push_back(vector<float *>(columns, (float *)NULL));
for(int i = columns; i; ) {
float *fc = mlnew float[classes];
impose.back()[--i] = fc;
for(float *fe = fc+classes; fc!=fe; *(fc++) = 0.0);
}
sortedRows->push_back(& *ri);
}
const TDIMRow *firstRow = &im->rows.front();
for(int attr=0; attr<attrs; attr++) { // we trust it is already sorted
float attrWeight = myAttrWeights->at(attr);
if (attrWeight>=0.0) {
bool ordered = attributes[attr]->ordered;
// impose by attr
for(vector<TDIMRow *>::const_iterator grp_begin(sortedRows->begin()), grp_in1, grp_in2, grp_end, grp_totend(sortedRows->end());
grp_begin!=grp_totend;
grp_begin=grp_end) {
// find the end of the group
grp_end=grp_begin;
while(++grp_end!=grp_totend) {
TExample::const_iterator ei1=(*grp_begin)->example->begin(), ei2=(*grp_end)->example->begin(), ei1e=(*grp_begin)->example->end();
for(int attrNo=0; (ei1!=ei1e) && ((attrNo==attr) || (*ei1==*ei2)); ei1++, ei2++, attrNo++);
if (ei1!=ei1e)
break;
}
for(grp_in1=grp_begin; grp_in1!=grp_end-1; grp_in1++)
for(grp_in2=grp_in1+1; grp_in2!=grp_end; grp_in2++) {
if (ordered) {
int dif=(*grp_in1)->example->operator[](attr).intV - (*grp_in2)->example->operator[](attr).intV;
if ((dif!=1) && (dif!=-1))
continue;
}
else
if ((*grp_in1)->example->operator[](attr).intV == (*grp_in2)->example->operator[](attr).intV)
continue;
{ vector<float *>::iterator odi((*grp_in1)->nodes.begin());
vector<float *> &imp = impose[*grp_in2-firstRow];
for(vector<float *>::iterator idi(imp.begin()), ide(imp.end()); idi!=ide; idi++, odi++)
for (float *idii= *idi, *idie = idii+classes, *odii = *odi; idii!=idie; *(idii++) += *(odii++)*attrWeight);
}
{ vector<float *>::iterator odi((*grp_in2)->nodes.begin());
vector<float *> &imp = impose[*grp_in1-firstRow];
for(vector<float *>::iterator idi(imp.begin()), ide(imp.end()); idi!=ide; idi++, odi++)
for (float *idii= *idi, *idie = idii+classes, *odii= *odi; idii!=idie; *(idii++) += *(odii++)*attrWeight);
}
}
}
}
// sort by attribute attr
vector<int> valf(attributes[attr]->noOfValues(), 0);
ITERATE(vector<TDIMRow *>, ii, *sortedRows) {
TValue &val=(*ii)->example->operator[](attr);
if (val.isSpecial())
raiseError("attribute '%s' has undefined values", attributes[attr]->name.c_str());
valf[val.intV]++;
}
int id=0;
for(vector<int>::iterator ni=valf.begin(); ni!=valf.end(); *(ni++)=(id+=*ni)-*ni);
vector<TDIMRow *> *newPtrs=mlnew vector<TDIMRow *>(sortedRows->size(), (TDIMRow *)NULL);
ITERATE(vector<TDIMRow *>, si, *sortedRows)
(*newPtrs)[valf[(*si)->example->operator[](attr).intV]++] = *si;
mldelete sortedRows;
sortedRows=newPtrs;
}
mldelete sortedRows;
vector<TDIMRow>::iterator oi=im->rows.begin();
ITERATE(vector<vector<float *> >, ii, impose) {
for(vector<float *>::iterator odi((*oi).nodes.begin()), idi((*ii).begin()), ide((*ii).end()); idi!=ide; idi++, odi++) {
float *di, *de;
if (onlyEmpty) {
for (di = *odi, de = di+classes; (di!=de) && !*di; di++);
if (di==de)
continue;
}
float *idii;
for (di = *odi, de = di+classes, idii = *idi; di!=de; *(di++) += *(idii++));
}
oi++;
}
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -