📄 lookup.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
#include <limits>
#include "random.hpp"
#include "vars.hpp"
#include "domain.hpp"
#include "estimateprob.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "stladdon.hpp"
#include "distvars.hpp"
#include "bayes.hpp"
#include "contingency.hpp"
#include "table.hpp"
#include "filter.hpp"
#include "lookup.ppp"
inline TValue getValue(const TExample &ex, const int &varIndex, PVariable variable)
{ return (varIndex==ILLEGAL_INT) ? variable->computeValue(ex) : ex[varIndex]; }
TClassifierByLookupTable::TClassifierByLookupTable(PVariable aclass, PValueList vlist)
: TClassifier(aclass, false), // we want TClassifier::classDistribution to call operator() when there are no distributions
lookupTable(vlist),
distributions(mlnew TDistributionList())
{
if (lookupTable)
for(int i = lookupTable->size(); i--; )
distributions->push_back(TDistribution::create(aclass));
}
void TClassifierByLookupTable::valuesFromDistributions()
{
if (lookupTable->size() != distributions->size())
raiseError("sizes of 'lookupTable' and 'distributions' mismatch");
TValueList::iterator vi(lookupTable->begin());
TDistributionList::const_iterator di(distributions->begin()), de(distributions->end());
for(; di!=de; di++, vi++)
if ((*vi).isSpecial())
*vi = (*di)->highestProbValue();
}
TClassifierByLookupTable1::TClassifierByLookupTable1(PVariable aclass, PVariable avar)
: TClassifierByLookupTable(aclass, mlnew TValueList(avar->noOfValues()+1, aclass->DK(), aclass)),
variable1(avar),
lastDomainVersion(-1)
{}
void TClassifierByLookupTable1::setLastDomain(PDomain domain)
{ lastVarIndex = domain->getVarNum(variable1, false);
lastDomainVersion = domain->version;
}
int TClassifierByLookupTable1::getIndex(const TExample &ex, TExample *conv)
{ if (lastDomainVersion!=ex.domain->version)
setLastDomain(ex.domain);
TValue val = getValue(ex, lastVarIndex, variable1);
if (val.isSpecial()) {
if (conv)
(*conv)[0] = val;
return -1;
}
return val.intV;
}
TValue TClassifierByLookupTable1::operator()(const TExample &ex)
{ if (lastDomainVersion!=ex.domain->version)
setLastDomain(ex.domain);
TValue val = getValue(ex, lastVarIndex, variable1);
return (val.isSpecial() || (val.intV>=int(lookupTable->size())))
? lookupTable->back()
: lookupTable->operator[](val.intV);
}
PDistribution TClassifierByLookupTable1::classDistribution(const TExample &ex)
{ if (!distributions)
return TClassifier::classDistribution(ex);
if (lastDomainVersion!=ex.domain->version)
setLastDomain(ex.domain);
TValue val = getValue(ex, lastVarIndex, variable1);
return (val.isSpecial() || (val.intV>=int(distributions->size())))
? CLONE(TDistribution, distributions->back())
: CLONE(TDistribution, distributions->operator[](val.intV));
}
void TClassifierByLookupTable1::predictionAndDistribution(const TExample &ex, TValue &value, PDistribution &dist)
{ if (!distributions) {
TClassifier::predictionAndDistribution(ex, value, dist);
return;
}
if (lastDomainVersion!=ex.domain->version)
setLastDomain(ex.domain);
TValue val = getValue(ex, lastVarIndex, variable1);
if (val.isSpecial() || (val.intV>=int(lookupTable->size()))) {
value = lookupTable->back();
dist = CLONE(TDistribution, distributions->back());
}
else {
value = lookupTable->operator[](val.intV);
dist = CLONE(TDistribution, distributions->operator[](val.intV));
}
}
/* valDistribution is distribution of values for variable
If "distributions" are known, this computes the weighted sum of distributions
and stores it in lookup table so that they're used anywhere where the table element is missing
Otherwise, it computes the distribution of classes and stores the majority into empty
elements of the lookup table.
*/
void TClassifierByLookupTable1::replaceDKs(TDiscDistribution &valDistribution)
{ TDiscDistribution sum(classVar), *classes = mlnew TDiscDistribution(classVar);
PDistribution wclasses = PDistribution(classes);
if (distributions) {
if (valDistribution.abs) {
TValueList::iterator vi(lookupTable->begin());
valDistribution[distributions->size()-1];
TDiscDistribution::iterator di(valDistribution.begin());
for(TDistributionList::iterator dvi(distributions->begin()), dve(distributions->end());
dvi!=dve;
dvi++, vi++, di++)
if (!(*vi).isSpecial()) {
const TDiscDistribution &tdi = CAST_TO_DISCDISTRIBUTION(*dvi);
TDiscDistribution::const_iterator tdii(tdi.begin());
for(TDiscDistribution::iterator si(sum.begin()), se(sum.end()); si!=se; si++, tdii++)
*si += *tdii * *di;
sum.abs += tdi.abs * *di;
classes->addint((*vi).intV, *di);
}
}
else {
TValueList::iterator vi(lookupTable->begin());
for(TDistributionList::iterator dvi(distributions->begin()), dve(distributions->end()); dvi!=dve; dvi++, vi++)
if (!(*vi).isSpecial()) {
sum += CAST_TO_DISCDISTRIBUTION(*dvi);
classes->addint((*vi).intV, 1);
}
}
sum.normalize();
TValueList::iterator vi(lookupTable->begin());
PITERATE(TDistributionList, dvi, distributions)
if ((*vi).isSpecial()) {
*dvi = mlnew TDiscDistribution(sum);
*(vi++) = classes->highestProbValue(); // this does not need to be the same for each call!
}
else
vi++;
}
else {
TDiscDistribution::iterator di(valDistribution.begin());
if (valDistribution.abs) {
for(TValueList::iterator vi(lookupTable->begin()), ve(lookupTable->end()); vi!=ve; vi++, di++)
if (!(*vi).isSpecial())
classes->addint((*vi).intV, *di);
}
else {
for(TValueList::iterator vi(lookupTable->begin()), ve(lookupTable->end()); vi!=ve; vi++, di++)
if (!(*vi).isSpecial())
classes->addint((*vi).intV, 1.0);
}
PITERATE(TValueList, vi, lookupTable)
if ((*vi).isSpecial())
(*vi) = classes->highestProbValue(); // this does not need to be the same for each call!
}
}
void TClassifierByLookupTable1::giveBoundSet(TVarList &boundSet)
{ boundSet = TVarList(1, variable1); }
TClassifierByLookupTable2::TClassifierByLookupTable2(PVariable aclass, PVariable avar1, PVariable avar2, PEFMDataDescription adata)
: TClassifierByLookupTable(aclass, mlnew TValueList((avar1->noOfValues()) * (avar2->noOfValues()), aclass->DK(), aclass)),
variable1(avar1),
variable2(avar2),
noOfValues1(avar1->noOfValues()),
noOfValues2(avar2->noOfValues()),
dataDescription(adata),
lastDomainVersion(-1)
{ if (!adata) {
TVarList attributes;
attributes.push_back(variable1);
attributes.push_back(variable2);
dataDescription=mlnew TEFMDataDescription(mlnew TDomain(PVariable(), attributes));
}
}
void TClassifierByLookupTable2::setLastDomain(PDomain domain)
{ lastVarIndex1 = domain->getVarNum(variable1, false);
lastVarIndex2 = domain->getVarNum(variable2, false);
lastDomainVersion = domain->version;
}
int TClassifierByLookupTable2::getIndex(const TExample &ex, TExample *conv)
{ if (lastDomainVersion!=ex.domain->version)
setLastDomain(ex.domain);
TValue val1 = getValue(ex, lastVarIndex1, variable1);
TValue val2 = getValue(ex, lastVarIndex2, variable2);
if (val1.isSpecial() || val2.isSpecial()) {
if (conv) {
(*conv)[0] = val1;
(*conv)[1] = val2;
}
return -1;
}
return noOfValues2 * val1.intV + val2.intV;
}
TValue TClassifierByLookupTable2::operator()(const TExample &ex)
{ TExample conv(dataDescription->domain);
int index=getIndex(ex, &conv);
if (index<0)
return TClassifier::operator()(conv, dataDescription);
else if (index>=int(lookupTable->size()))
return dataDescription->domainDistributions->back()->highestProbValue(ex);
else
return lookupTable->operator[](index);
}
PDistribution TClassifierByLookupTable2::classDistribution(const TExample &ex)
{ if (!distributions)
return TClassifier::classDistribution(ex);
TExample conv(dataDescription->domain);
int index=getIndex(ex, &conv);
if (index<0)
return TClassifier::classDistribution(conv, dataDescription);
else if (index>=int(distributions->size()))
return CLONE(TDistribution, dataDescription->domainDistributions->back());
else
return CLONE(TDistribution, distributions->operator[](index));
}
void TClassifierByLookupTable2::predictionAndDistribution(const TExample &ex, TValue &value, PDistribution &dist)
{ if (!distributions) {
TClassifier::predictionAndDistribution(ex, value, dist);
return;
}
TExample conv(dataDescription->domain);
int index=getIndex(ex, &conv);
if (index<0) {
dist = TClassifier::classDistribution(conv, dataDescription);
value = dist->highestProbValue(ex);
}
else if (index>=int(distributions->size())) {
dist = CLONE(TDistribution, dataDescription->domainDistributions->back());
value = dist->highestProbValue(ex);
}
else {
dist = CLONE(TDistribution, distributions->operator[](index));
value = lookupTable->operator[](index);
}
}
void TClassifierByLookupTable2::replaceDKs(PExampleGenerator examples, bool useBayes)
{
PClassifier bayes;
PDistribution classDist;
if (useBayes)
bayes = TBayesLearner()(examples);
else
classDist = getClassDistribution(examples /*, weightID */);
TValueList::iterator vi(lookupTable->begin());
TDistributionList::iterator di(distributions->begin());
bool distr = distributions && (distributions->size()>0);
TExample example(dataDescription->domain);
variable1->firstValue(example[0]);
do {
variable2->firstValue(example[1]);
do {
if ((*vi).isSpecial())
if (useBayes) {
*vi = bayes->operator()(example);
if (distr)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -