📄 rulelearner.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Martin Mozina, Janez Demsar, Blaz Zupan, 1996--2004
Contact: martin.mozina@fri.uni-lj.si
*/
#include "filter.hpp"
#include "table.hpp"
#include "stat.hpp"
#include "measures.hpp"
#include "discretize.hpp"
#include "distvars.hpp"
#include "classfromvar.hpp"
#include "progress.hpp"
#include "rulelearner.ppp"
DEFINE_TOrangeVector_classDescription(PRule, "TRuleList", true, ORANGE_API)
TRule::TRule()
: weightID(0),
quality(ILLEGAL_FLOAT),
complexity(ILLEGAL_FLOAT),
coveredExamples(NULL),
coveredExamplesLength(-1),
parentRule(NULL)
{}
TRule::TRule(PFilter af, PClassifier cl, PLearner lr, PDistribution dist, PExampleTable ce, const int &w, const float &qu)
: filter(af),
classifier(cl),
learner(lr),
classDistribution(dist),
examples(ce),
weightID(w),
quality(qu),
coveredExamples(NULL),
coveredExamplesLength(-1),
parentRule(NULL)
{}
TRule::TRule(const TRule &other, bool copyData)
: filter(other.filter? other.filter->deepCopy() : PFilter()),
classifier(other.classifier),
learner(other.learner),
complexity(other.complexity),
classDistribution(copyData ? other.classDistribution: PDistribution()),
examples(copyData ? other.examples : PExampleTable()),
weightID(copyData ? other.weightID : 0),
quality(copyData ? other.quality : ILLEGAL_FLOAT),
coveredExamples(copyData && other.coveredExamples && (other.coveredExamplesLength >= 0) ? (int *)memcpy(new int[other.coveredExamplesLength], other.coveredExamples, other.coveredExamplesLength) : NULL),
coveredExamplesLength(copyData ? other.coveredExamplesLength : -1),
parentRule(other.parentRule)
{}
TRule::~TRule()
{ delete coveredExamples; }
bool TRule::operator ()(const TExample &ex)
{
checkProperty(filter);
return filter->call(ex);
}
#define HIGHBIT 0x80000000
PExampleTable TRule::operator ()(PExampleTable gen, const bool ref, const bool negate)
{
checkProperty(filter);
TExampleTable *table = ref ? mlnew TExampleTable(gen, 1) : mlnew TExampleTable(PExampleGenerator(gen));
PExampleGenerator wtable = table;
PEITERATE(ei, gen)
if (filter->call(*ei) != negate)
table->addExample(*ei);
return wtable;
}
void TRule::filterAndStore(PExampleTable gen, const int &wei, const int &targetClass, const int *prevCovered, const int anExamples)
{
checkProperty(filter);
examples=this->call(gen);
weightID = wei;
classDistribution = getClassDistribution(examples, wei);
if (classDistribution->abs==0)
return;
if (learner) {
classifier = learner->call(examples,wei);
}
else if (targetClass>=0)
classifier = mlnew TDefaultClassifier(gen->domain->classVar, TValue(targetClass), classDistribution);
else
classifier = mlnew TDefaultClassifier(gen->domain->classVar, classDistribution);
/* if (anExamples > 0) {
const int bitsInInt = sizeof(int)*8;
coveredExamplesLength = anExamples/bitsInInt + 1;
coveredExamples = (int *)malloc(coveredExamplesLength);
if (prevCovered) {
memcpy(coveredExamples, prevCovered, coveredExamplesLength);
int *cei = coveredExamples-1;
int mask = 0;
int inBit = 0;
PEITERATE(ei, gen) {
if (!(*cei & mask)) {
if (inBit)
*cei = *cei << inBit;
while(!*++cei);
mask = -1;
inBit = bitsInInt;
}
while( (*cei & HIGHBIT) == 0) {
*cei = *cei << 1;
*cei = mask << 1;
inBit--;
}
if (filter->call(*ei)) {
*cei = (*cei << 1) | 1;
table->addExample(*ei);
}
else
*cei = *cei << 1;
mask = mask << 1;
inBit--;
}
}
else {
int *cei = coveredExamples;
int inBit = bitsInInt;
PEITERATE(ei, gen) {
if (filter->call(*ei)) {
*cei = (*cei << 1) | 1;
table->addExample(*ei);
}
else
*cei = *cei << 1;
if (!--inBit) {
inBit = bitsInInt;
cei++;
}
}
*cei = *cei << inBit;
}
} */
}
bool haveEqualValues(const TRule &r1, const TRule &r2)
{
const TDefaultClassifier *clsf1 = r1.classifier.AS(TDefaultClassifier);
const TDefaultClassifier *clsf2 = r2.classifier.AS(TDefaultClassifier);
if (!clsf1 || !clsf2)
return false;
const TDiscDistribution *dist1 = dynamic_cast<const TDiscDistribution *>(clsf1->defaultDistribution.getUnwrappedPtr());
const TDiscDistribution *dist2 = dynamic_cast<const TDiscDistribution *>(clsf2->defaultDistribution.getUnwrappedPtr());
float high1 = dist1->highestProb();
float high2 = dist2->highestProb();
for(TDiscDistribution::const_iterator d1i(dist1->begin()), d1e(dist1->end()), d2i(dist2->begin()), d2e(dist2->end());
(d1i!=d1e) && (d2i!=d2e);
d1i++, d2i++)
if ((*d1i == high1) && (*d2i == high2))
return true;
return false;
}
bool TRule::operator <(const TRule &other) const
{
if (!haveEqualValues(*this, other))
return false;
bool different = false;
if (coveredExamples && other.coveredExamples) {
int *c1i = coveredExamples;
int *c2i = other.coveredExamples;
for(int i = coveredExamplesLength; i--; c1i++, c2i++) {
if (*c1i & ~*c2i)
return false;
if (*c1i != *c2i)
different = true;
}
}
else {
raiseError("operator not implemented yet");
}
return different;
}
bool TRule::operator <=(const TRule &other) const
{
if (!haveEqualValues(*this, other))
return false;
if (coveredExamples && other.coveredExamples) {
int *c1i = coveredExamples;
int *c2i = other.coveredExamples;
for(int i = coveredExamplesLength; i--; c1i++, c2i++) {
if (*c1i & ~*c2i)
return false;
}
}
else {
raiseError("operator not implemented yet");
}
return true;
}
bool TRule::operator >(const TRule &other) const
{
if (!haveEqualValues(*this, other))
return false;
bool different = false;
if (coveredExamples && other.coveredExamples) {
int *c1i = coveredExamples;
int *c2i = other.coveredExamples;
for(int i = coveredExamplesLength; i--; c1i++, c2i++) {
if (~*c1i & *c2i)
return false;
if (*c1i != *c2i)
different = true;
}
}
else {
raiseError("operator not implemented yet");
}
return different;
}
bool TRule::operator >=(const TRule &other) const
{
if (!haveEqualValues(*this, other))
return false;
if (coveredExamples && other.coveredExamples) {
int *c1i = coveredExamples;
int *c2i = other.coveredExamples;
for(int i = coveredExamplesLength; i--; c1i++, c2i++) {
if (~*c1i & *c2i)
return false;
}
}
else {
raiseError("operator not implemented yet");
}
return true;
}
bool TRule::operator ==(const TRule &other) const
{
if (!haveEqualValues(*this, other))
return false;
if (coveredExamples && other.coveredExamples) {
return !memcmp(coveredExamples, other.coveredExamples, coveredExamplesLength);
}
else {
raiseError("operator not implemented yet");
}
return false;
}
TRuleValidator_LRS::TRuleValidator_LRS(const float &a, const float &min_coverage, const float &max_rule_complexity, const float &min_quality)
: alpha(a),
min_coverage(min_coverage),
max_rule_complexity(max_rule_complexity),
min_quality(min_quality)
{}
bool TRuleValidator_LRS::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) const
{
const TDiscDistribution &obs_dist = dynamic_cast<const TDiscDistribution &>(rule->classDistribution.getReference());
if (!obs_dist.cases)
return false;
if (min_coverage>0.0 && obs_dist.cases < min_coverage)
return false;
if (max_rule_complexity > 0.0 && rule->complexity > max_rule_complexity)
return false;
if (min_quality>rule->quality)
return false;
const TDiscDistribution &exp_dist = dynamic_cast<const TDiscDistribution &>(apriori.getReference());
if (obs_dist.abs == exp_dist.abs) //it turns out that this happens quite often
return false;
if (alpha >= 1.0)
return true;
if (targetClass == -1) {
float lrs = 0.0;
for(TDiscDistribution::const_iterator odi(obs_dist.begin()), ode(obs_dist.end()), edi(exp_dist.begin()), ede(exp_dist.end());
(odi!=ode); odi++, edi++) {
if ((edi!=ede) && (*edi) && (*odi))
lrs += *odi * log(*odi / ((edi != ede) & (*edi > 0.0) ? *edi : 1e-5));
}
lrs = 2 * (lrs - obs_dist.abs * log(obs_dist.abs / exp_dist.abs));
return (lrs > 0.0) && (chisqprob(lrs, float(obs_dist.size()-1)) <= alpha);
}
float p = (targetClass < obs_dist.size()) ? obs_dist[targetClass] : 1e-5;
const float P = (targetClass < exp_dist.size()) && (exp_dist[targetClass] > 0.0) ? exp_dist[targetClass] : 1e-5;
float n = obs_dist.abs - p;
float N = exp_dist.abs - P;
if (n>=N)
return false;
if (N<=0.0)
N = 1e-6f;
if (p<=0.0)
p = 1e-6f;
if (n<=0.0)
n = 1e-6f;
float lrs = 2 * (p*log(p/P) + n*log(n/N) - obs_dist.abs * log(obs_dist.abs/exp_dist.abs));
return (lrs > 0.0) && (chisqprob(lrs, 1.0f) <= alpha);
}
float TRuleEvaluator_Entropy::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) const
{
const TDiscDistribution &obs_dist = dynamic_cast<const TDiscDistribution &>(rule->classDistribution.getReference());
if (!obs_dist.cases)
return -numeric_limits<float>::max();
if (targetClass == -1)
return -getEntropy(dynamic_cast<TDiscDistribution &>(rule->classDistribution.getReference()));
const TDiscDistribution &exp_dist = dynamic_cast<const TDiscDistribution &>(apriori.getReference());
float p = (targetClass < obs_dist.size()) ? obs_dist[targetClass] : 0.0;
const float P = (targetClass < exp_dist.size()) && (exp_dist[targetClass] > 0.0) ? exp_dist[targetClass] : 1e-5;
float n = obs_dist.abs - p;
float N = exp_dist.abs - P;
if (N<=0.0)
N = 1e-6f;
if (p<=0.0)
p = 1e-6f;
if (n<=0.0)
n = 1e-6f;
return ((p*log(p) + n*log(n) - obs_dist.abs * log(obs_dist.abs)) / obs_dist.abs);
}
float TRuleEvaluator_Laplace::operator()(PRule rule, PExampleTable, const int &, const int &targetClass, PDistribution apriori) const
{
const TDiscDistribution &obs_dist = dynamic_cast<const TDiscDistribution &>(rule->classDistribution.getReference());
if (!obs_dist.cases)
return 0;
float p;
if (targetClass == -1) {
p = float(obs_dist.highestProb());
return (p+1)/(obs_dist.abs+obs_dist.size());
}
p = float(obs_dist[targetClass]);
return (p+1)/(obs_dist.abs+2);
}
bool worstRule(const PRule &r1, const PRule &r2)
{ return (r1->quality > r2->quality)
|| (r1->quality==r2->quality
&& r1->complexity < r2->complexity);
}
/* || (r1->quality==r2->quality)
&& ( (r1->complexity < r2->complexity)
|| (r1->complexity == r2->complexity)
&& ((int(r1.getUnwrappedPtr()) ^ int(r2.getUnwrappedPtr())) & 16) != 0
); } */
bool inRules(PRuleList rules, PRule rule)
{
TRuleList::const_iterator ri(rules->begin()), re(rules->end());
PExampleGenerator rulegen = rule->examples;
for (; ri!=re; ri++) {
PExampleGenerator rigen = (*ri)->examples;
if (rigen->numberOfExamples() == rulegen->numberOfExamples()) {
TExampleIterator rei(rulegen->begin()), ree(rulegen->end());
TExampleIterator riei(rigen->begin()), riee(rigen->end());
for (; rei != ree && !(*rei).compare(*riei); ++rei, ++riei) {
}
if (rei == ree)
return true;
}
}
return false;
}
TRuleBeamFilter_Width::TRuleBeamFilter_Width(const int &w)
: width(w)
{}
void TRuleBeamFilter_Width::operator()(PRuleList &rules, PExampleTable, const int &)
{
if (rules->size() > width) {
// Janez poglej
sort(rules->begin(), rules->end(), worstRule);
TRuleList *filteredRules = mlnew TRuleList;
PRuleList wFilteredRules = filteredRules;
int nRules = 0;
TRuleList::const_iterator ri(rules->begin()), re(rules->end());
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -