📄 classifier.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
#include "vars.hpp"
#include <math.h>
#include "stladdon.hpp"
#include "random.hpp"
#include "examplegen.hpp"
#include "examples.hpp"
#include "domain.hpp"
//#include "filter.hpp"
#include "table.hpp"
#include "classify.ppp"
DEFINE_TOrangeVector_classDescription(PClassifier, "TClassifierList", true, ORANGE_API)
/* ***** TClassifier methods */
TClassifier::TClassifier(const PVariable &acv, const bool &cp)
: classVar(acv),
computesProbabilities(cp)
{};
TClassifier::TClassifier(const bool &cp)
: classVar(PVariable()),
computesProbabilities(cp)
{};
TClassifier::TClassifier(const TClassifier &old)
: TOrange(old),
classVar(old.classVar),
computesProbabilities(old.computesProbabilities)
{};
TValue TClassifier::operator ()(const TExample &exam)
{ if (!computesProbabilities)
raiseError("invalid setting of 'computesProbabilities'");
return classVar->varType==TValue::FLOATVAR ? TValue(classDistribution(exam)->average()) : classDistribution(exam)->highestProbValue(exam);
}
PDistribution TClassifier::classDistribution(const TExample &exam)
{ if (computesProbabilities)
raiseError("invalid setting of 'computesProbabilities'");
PDistribution dist = TDistribution::create(classVar);
dist->add(operator()(exam));
return dist;
}
void TClassifier::predictionAndDistribution(const TExample &ex, TValue &val, PDistribution &classDist)
{ if (computesProbabilities) {
classDist = classDistribution(ex);
val = classVar->varType==TValue::FLOATVAR ? TValue(classDist->average()) : classDist->highestProbValue(ex);
}
else {
val = operator()(ex);
classDist = TDistribution::create(classVar);
classDist->add(val);
}
}
TEFMDataDescription::TEFMDataDescription(PDomain dom, PDomainDistributions dist, int ow, int mw)
: domain(dom),
domainDistributions(dist),
originalWeight(ow),
missingWeight(mw)
{ getAverages();}
void TEFMDataDescription::getAverages()
{ averages = vector<float>();
if (domainDistributions)
for(TDomainDistributions::iterator si(domainDistributions->begin()), ei(domainDistributions->end()); si!=ei; si++)
averages.push_back(((*si)->variable->varType==TValue::INTVAR)
? numeric_limits<float>::quiet_NaN()
: (*si)->average());
}
float TEFMDataDescription::getExampleWeight(const TExample &example) const
{
if (example.domain != domain)
raiseError("example's domain doesn't match the data descriptor's");
float weight=1.0;
TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
TExample::iterator ei(example.begin());
for(; vi!=vie; ei++, vi++)
if ((*ei).isDK() && ((*ei).varType == TValue::INTVAR))
weight /= (*vi)->noOfValues();
return weight;
}
float TEFMDataDescription::getExampleMatch(const TExample &ex1, const TExample &ex2)
{
if ((ex1.domain != domain) && (ex2.domain != domain))
raiseError("example's domain doesn't match the data descriptor's");
float weight=1.0;
TExample::iterator e1i(ex1.begin()), e2i(ex2.end());
if (domainDistributions) {
if (matchProbabilities.size() != domainDistributions->size())
matchProbabilities = vector<float>(domainDistributions->size(), -1);
vector<float>::iterator mi(matchProbabilities.begin());
TDomainDistributions::const_iterator di(domainDistributions->begin()), de(domainDistributions->end());
for(; di!=de; e1i++, e2i++, di++, mi++) {
if ((*e1i).varType == TValue::INTVAR) {
if ((*e1i).isDK()) {
if ((*e2i).isDK()) {
if (*mi == -1) {
float mp = 0.0;
ITERATE(TDiscDistribution, ddi, ((TDiscDistribution &)((*di).getReference())))
mp += *ddi * *ddi;
*mi = mp;
}
weight *= *mi;
}
else if (!(*e2i).isSpecial())
weight *= (*di)->p(*e2i);
}
else if ((*e2i).isDK() && !(*e1i).isSpecial())
weight *= (*di)->p(*e1i);
}
}
}
else {
TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
for(; vi!=vie; e1i++, e2i++, vi++)
if (((*e1i).varType == TValue::INTVAR) && ((*e1i).isDK() && !(*e2i).isSpecial() || (*e2i).isDK() && !(*e1i).isSpecial()))
weight /= (*vi)->noOfValues();
}
return weight;
}
TExampleForMissing::TExampleForMissing(PDomain dom, PEFMDataDescription dd)
: TExample(dom),
dataDescription(dd)
{ if (dd && (dd->domain!=domain))
raiseError("data description does not match the domain");
}
TExampleForMissing::TExampleForMissing(const TExampleForMissing &orig)
: TExample((const TExample &)(orig)),
dataDescription(orig.dataDescription),
DKs(orig.DKs),
DCs(orig.DCs)
{}
TExampleForMissing::TExampleForMissing(const TExample &orig, PEFMDataDescription dd)
: TExample(orig),
dataDescription(dd)
{ if (dd && (dd->domain!=domain))
raiseError("data description does not match the domain");
}
TExampleForMissing::TExampleForMissing(PDomain dom, const TExample &orig, PEFMDataDescription dd)
: TExample(dom, orig),
dataDescription(dd)
{ if (dd && (dd->domain!=domain))
raiseError("data description does not match the domain");
}
TExampleForMissing &TExampleForMissing::operator =(const TExampleForMissing &orig)
{ (TExample &)(*this) = (const TExample &)(orig);
dataDescription=orig.dataDescription;
DKs = orig.DKs;
DCs = orig.DCs;
return *this;
}
TExample &TExampleForMissing::operator =(const TExample &orig)
{ (TExample &)(*this).TExample::operator=(orig);
return *this;
}
void TExampleForMissing::resetExample()
{
checkProperty(dataDescription);
DCs.clear();
DKs.clear();
float averageWeight=1;
TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
TExample::iterator ei(begin()), bei(ei);
vector<float>::const_iterator ai(dataDescription->averages.begin());
for(; vi!=vie; ei++, vi++, ai++)
if ((*ei).isSpecial()) {
if ((*vi)->varType==TValue::FLOATVAR)
*ei=TValue(*ai);
else if (dataDescription->missingWeight && (*ei).isDK()) {
DKs.push_back(ei-bei);
averageWeight/=float((*vi)->noOfValues());
}
else
DCs.push_back(ei-bei);
(*vi)->firstValue(*ei);
}
if (dataDescription->missingWeight) {
float weight = dataDescription->originalWeight ? getMeta(dataDescription->originalWeight).floatV : 1;
if (dataDescription->domainDistributions) {
TDomainDistributions::const_iterator di(dataDescription->domainDistributions->begin());
ITERATE(vector<int>, ci, DKs) {
// DKs contain only discrete variables, so it is safe to cast
const TDiscDistribution &dist = CAST_TO_DISCDISTRIBUTION(*(di+*ci));
if (dist.abs)
weight *= dist.front() / dist.abs;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -