📄 decomposition.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
#include "stladdon.hpp"
#include "vars.hpp"
#include "domain.hpp"
#include "examples.hpp"
#include "table.hpp"
// the following are here just for IMByRowsByRelief -- move it to a separate file?
#include "distance.hpp"
#include "random.hpp"
#include <set>
#include "decomposition.ppp"
TExample_nodeIndex::TExample_nodeIndex(PExample ex)
: example(ex), nodeIndex(0)
{}
TSortedExamples_nodeIndices::TSortedExamples_nodeIndices(PExampleGenerator eg, const vector<bool> &bound, const vector<bool> &free)
// We need to make a copy of examples since we shall store pointers to them.
: exampleTable(mlnew TExampleTable(eg)),
maxIndex(-1)
{
// Copy pointers to examples to a vector
vector<TExample_nodeIndex> eni;
{ eni.reserve(exampleTable->numberOfExamples());
PEITERATE(ei, exampleTable)
if (!(*ei).getClass().isSpecial())
// We wrap references to examples; exampleTable will live at least as long as those references need to
eni.push_back(TExample_nodeIndex(PExample(*ei)));
}
// copy pointers to a table for sorting
vector<TEnIIterator> *sorting = mlnew vector<TEnIIterator>();
try {
{ sorting->reserve(eni.size());
ITERATE(vector<TExample_nodeIndex>, enii, eni)
sorting->push_back(enii);
}
/* Sorts the vector according to values of bound attributes,
and assigns indices by multiplications.
We use a combination of bucket and counting sort with linear time complexity. */
int mult=1;
{ TVarList::iterator attr = eg->domain->attributes->begin();
const_ITERATE(vector<bool>, bi, bound) {
if (*bi) {
if ((*attr)->varType!=TValue::INTVAR)
raiseError("bound attribute '%s' is not discrete", (*attr)->name.c_str());
int values=(*attr)->noOfValues();
if (values<=0)
raiseError("attribute '%s' has invalid number of values", (*attr)->name.c_str());
sortByAttr_Mult(bi-bound.begin(), sorting, values);
mult *= values;
}
attr++;
}
}
// sort by class and free attributes
{ sortByAttr(eg->domain->attributes->size(), sorting, eg->domain->classVar->noOfValues());
TVarList::iterator attr = eg->domain->attributes->begin();
const_ITERATE(vector<bool>, fi, free) {
if (*fi) {
if ((*attr)->varType != TValue::INTVAR)
raiseError("free attribute '%s' is not discrete", (*attr)->name.c_str());
sortByAttr(fi-free.begin(), sorting, (*attr)->noOfValues());
}
attr++;
}
}
// initialize the vector
{ reserve(sorting->size());
ITERATE(vector<TEnIIterator>, enii, *sorting)
push_back(**enii); }
// compress the index table (remove the unoccupied numbers)
{ vector<int> indices(mult, 0);
{ this_ITERATE(ei)
indices[(*ei).nodeIndex]++; }
ITERATE(vector<int>, bi, indices)
if (*bi>0)
*bi = ++maxIndex;
{ this_ITERATE(ei)
(*ei).nodeIndex=indices[(*ei).nodeIndex]; }
}
}
catch (exception) {
mldelete sorting;
throw;
}
mldelete sorting;
}
void TSortedExamples_nodeIndices::sortByAttr_Mult(int attrNo, vector<TEnIIterator> *&sorting, int values)
{ vector<int> valf(values, 0);
ITERATE(vector<TEnIIterator>, ii, *sorting) {
TValue &val = (*ii)->example->operator[](attrNo);
if (val.isSpecial())
raiseError("attribute '%s' has undefined values", (*ii)->example->domain->getVar(attrNo)->name.c_str());
valf[val.intV]++;
}
int id = 0;
for(vector<int>::iterator ni(valf.begin()), ne(valf.end()); ni!=ne; *(ni++) = (id+=*ni)-*ni);
vector<TEnIIterator> *newPtrs = mlnew vector<TEnIIterator>(sorting->size(), sorting->front());
ITERATE(vector<TEnIIterator>, si, *sorting) {
int valind=(*si)->example->operator[](attrNo).intV;
(*newPtrs)[valf[valind]++] = *si;
(**si).nodeIndex = (**si).nodeIndex * values + valind;
}
mldelete sorting;
sorting = newPtrs;
}
void TSortedExamples_nodeIndices::sortByAttr(int attrNo, vector<TEnIIterator> *&sorting, int values)
{ vector<int> valf(values, 0);
ITERATE(vector<TEnIIterator>, ii, *sorting) {
TValue &val = (*ii)->example->operator[](attrNo);
if (val.isSpecial())
raiseError("attribute '%s' has undefined values", (*ii)->example->domain->getVar(attrNo)->name.c_str());
valf[val.intV]++;
}
int id = 0;
for(vector<int>::iterator ni=valf.begin(); ni!=valf.end(); *(ni++) = (id+=*ni)-*ni);
vector<TEnIIterator> *newPtrs = mlnew vector<TEnIIterator>(sorting->size(), sorting->front());
ITERATE(vector<TEnIIterator>, si, *sorting)
(*newPtrs)[valf[(*si)->example->operator[](attrNo).intV]++] = *si;
mldelete sorting;
sorting=newPtrs;
}
TIMColumnNode::TIMColumnNode(const int &anind, TIMColumnNode *anext, float nerr)
: index(anind),
next(anext),
nodeQuality(nerr)
{}
TIMColumnNode::~TIMColumnNode()
{ while(next) {
TIMColumnNode *nn=next->next;
next->next=NULL; // prevents stack overflow when the list deletes itself
mldelete next;
next=nn;
}
}
TDIMColumnNode::TDIMColumnNode(const int &anind, const int &noofval, float *adist, TIMColumnNode *anext)
: TIMColumnNode(anind, anext),
noOfValues(noofval),
distribution(adist ? adist : mlnew float[noofval])
{ if (adist)
computeabs();
else {
float *di = distribution;
for(int c = noOfValues; c--; *(di++) = 0.0);
abs = -1.0;
}
}
TDIMColumnNode::~TDIMColumnNode()
{ mldelete distribution; }
TIMColumnNode &TDIMColumnNode::operator += (const TIMColumnNode &other)
{ float *di = distribution, *de = distribution+noOfValues;
const float *ddi = dynamic_cast<const TDIMColumnNode &>(other).distribution;
while (di!=de)
*(di++) += *(ddi++);
return *this;
}
TFIMColumnNode::TFIMColumnNode(int anind, TIMColumnNode *anext, const float asum, const float asum2, const float aN)
: TIMColumnNode(anind, anext),
sum(asum),
sum2(asum2),
N(aN)
{}
void TFIMColumnNode::add(const float &value, const float weight)
{ sum += weight*value;
sum2 += weight*value*value;
N += weight;
}
TIMColumnNode &TFIMColumnNode::operator += (const TIMColumnNode &other)
{ TFIMColumnNode const &nde = dynamic_cast<const TFIMColumnNode &>(other);
sum += nde.sum;
sum2+= nde.sum2;
N += nde.N;
return *this;
}
T_ExampleIMColumnNode::T_ExampleIMColumnNode(PExample anexample, TIMColumnNode *anode)
: example(anexample),
column(anode)
{}
T_ExampleIMColumnNode::T_ExampleIMColumnNode(const T_ExampleIMColumnNode &other)
: example(other.example),
column(other.column)
{ const_cast<T_ExampleIMColumnNode &>(other).column = NULL; }
T_ExampleIMColumnNode::~T_ExampleIMColumnNode()
{ mldelete column; }
T_ExampleIMColumnNode &T_ExampleIMColumnNode::operator =(const T_ExampleIMColumnNode &other)
{ example = other.example;
column = other.column;
const_cast<T_ExampleIMColumnNode &>(other).column=NULL;
return *this;
}
TIM::TIM(const int &avarType)
: varType(avarType),
columns()
{}
int TIM::traverse(visitproc visit, void *arg) const
{ TRAVERSE(TOrange::traverse);
const_ITERATE(vector<T_ExampleIMColumnNode>, pi, columns)
PVISIT((*pi).example);
return 0;
}
int TIM::dropReferences()
{ DROPREFERENCES(TOrange::dropReferences);
columns.clear();
return 0;
}
bool TIM::fuzzy()
{ ITERATE(vector<T_ExampleIMColumnNode>, ci, columns)
if (varType==TValue::INTVAR)
for(TDIMColumnNode *dnode=dynamic_cast<TDIMColumnNode *>((*ci).column); dnode; dnode = (TDIMColumnNode *)(dnode->next)) {
float *di=dnode->distribution;
int c;
for (c = dnode->noOfValues; c--; di++)
if ((*di!=0.0) && (*di != dnode->abs))
return true; // if !c, all is empty or only the last is non-empty
}
else
for(TFIMColumnNode *fnode = dynamic_cast<TFIMColumnNode *>((*ci).column); fnode; fnode = (TFIMColumnNode *)(fnode->next))
if (fnode->N*fnode->sum2 != sqr(fnode->sum))
return true;
return false;
}
TIMConstructor::TIMConstructor(const bool &anRE)
: recordRowExamples(anRE)
{}
PIM TIMConstructor::operator()(PExampleGenerator gen, const TVarList &aboundSet, const int &weightID)
{
// Identify bound attributes
vector<bool> bound = vector<bool>(gen->domain->attributes->size(), false);
vector<bool> free = vector<bool>(gen->domain->attributes->size(), true);
const_ITERATE(TVarList, evi, aboundSet) {
int vn = gen->domain->getVarNum(*evi);
bound[vn] = true;
free[vn] = false;
}
return operator()(gen, bound, aboundSet, free, weightID);
}
PIM TIMConstructor::operator()(PExampleGenerator gen, const TVarList &aboundSet, const TVarList &afreeSet, const int &weightID)
{
// Identify bound attributes
vector<bool> bound=vector<bool>(gen->domain->attributes->size(), false);
{ const_ITERATE(TVarList, evi, aboundSet)
bound[gen->domain->getVarNum(*evi)] = true;
}
vector<bool> free=vector<bool>(gen->domain->attributes->size(), false);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -