📄 distvars.cpp
字号:
if (position >= gen->domain->variables->size())
raiseError("index %i out of range", position);
variable = gen->domain->variables->at(position);
if (variable->varType != TValue::FLOATVAR)
raiseError("attribute '%s' is not continuous", variable->name.c_str());
PEITERATE(ei, gen)
add((*ei)[position], WEIGHT(*ei));
}
TContDistribution::TContDistribution(PExampleGenerator gen, PVariable var, const int &weightID)
: TDistribution(var),
sum(0.0),
sum2(0.0)
{
supportsContinuous = true;
if (variable->varType != TValue::FLOATVAR)
raiseError("attribute '%s' is not continuous", variable->name.c_str());
int position = gen->domain->getVarNum(variable, false);
if (position != ILLEGAL_INT)
PEITERATE(ei, gen)
add((*ei)[position], WEIGHT(*ei));
else
if (variable->getValueFrom)
PEITERATE(ei, gen)
add(variable->computeValue(*ei), WEIGHT(*ei));
else
raiseError("attribute '%s' not in domain and cannot be computed", variable->name.c_str());
}
const float &TContDistribution::atfloat(const float &v)
{ if (find(v)!=end())
distribution[v]=0;
return distribution[v];
}
const float &TContDistribution::atfloat(const float &v) const
{ const_iterator vi=find(v);
if (vi==end())
raiseError("value %5.3f does not exist", v);
return (*vi).second;
}
void TContDistribution::addfloat(const float &v, const float &w)
{
iterator vi=find(v);
if (vi==end())
distribution[v]=w;
else
(*vi).second+=w;
abs += w;
cases += w;
sum += w * v;
sum2 += w * v*v;
normalized = false;
}
void TContDistribution::setfloat(const float &v, const float &w)
{
iterator vi=find(v);
if (vi==end()) {
distribution[v]=w;
abs += w;
cases += w;
sum += w * v;
sum += w * v*v;
}
else {
float dif = w - (*vi).second;
abs += dif;
cases += w;
sum += dif * v;
sum2 += dif * v*v;
(*vi).second += w;
}
normalized = false;
}
TDistribution &TContDistribution::operator +=(const TDistribution &other)
{
const TContDistribution *mother = dynamic_cast<const TContDistribution *>(&other);
if (!mother)
raiseError("wrong distribution type for +=");
const_PITERATE(TContDistribution, oi, mother)
addfloat((*oi).first, (*oi).second);
unknowns += mother->unknowns;
return *this;
}
TDistribution &TContDistribution::operator -=(const TDistribution &other)
{
const TContDistribution *mother = dynamic_cast<const TContDistribution *>(&other);
if (!mother)
raiseError("wrong distribution type for -=");
const_PITERATE(TContDistribution, oi, mother)
addfloat((*oi).first, -(*oi).second);
unknowns -= mother->unknowns;
return *this;
}
TDistribution &TContDistribution::operator +=(PDistribution other)
{ return operator += (other.getReference()); }
TDistribution &TContDistribution::operator -=(PDistribution other)
{ return operator -= (other.getReference()); }
TDistribution &TContDistribution::operator *=(const float &weight)
{ for(iterator i(begin()), e(end()); i!=e; (*(i++)).second*=weight);
abs *= weight;
sum *= weight;
sum2 *= weight;
normalized = false;
return *this;
}
float TContDistribution::highestProbFloatIndex() const
{
// Could use sumValues here, but it's too expensive; this should work for distributions that are distributed enough
long sum = 0;
{ const_this_ITERATE(i)
sum += *(long *)(&(*i).first) + *(long *)(&(*i).second);
}
TSimpleRandomGenerator rg(sum);
int wins=0;
const_iterator best;
const_this_ITERATE(i)
if ( (wins==0) && ((wins=1)==1)
|| ((*i).second > (*best).second) && ((wins=1)==1)
|| ((*i).second == (*best).second) && rg.randbool(++wins))
best = i;
if (!wins)
raiseError("cannot compute the modus of an empty distribution");
return (*best).first;
}
float TContDistribution::highestProb() const
{
long sum = 0;
{ const_this_ITERATE(i)
sum += *(long *)(&(*i).first) + *(long *)(&(*i).second);
}
TSimpleRandomGenerator rg(sum);
int wins=0;
const_iterator best;
const_this_ITERATE(i)
if ( (wins==0) && ((wins=1)==1)
|| ((*i).second > (*best).second) && ((wins=1)==1)
|| ((*i).second == (*best).second) && rg.randbool(++wins))
best = i;
if (wins)
return (*best).second;
else
return size() ? 1.0/size() : 0.0;
}
bool TContDistribution::noDeviation() const
{ return size()==1;
}
float TContDistribution::average() const
{ if (!abs)
if (variable)
raiseError("cannot compute average ('%s' has no defined values)", variable->name.c_str());
else
raiseError("cannot compute average (attribute has no defined values)");
return sum/abs ;
}
float TContDistribution::dev() const
{
if (abs<=1e-7)
if (variable)
raiseError("cannot compute standard deviation ('%s' has no defined values)", variable->name.c_str());
else
raiseError("cannot compute standard deviation (attribute has no defined values)");
return sqrt((sum2-sum*sum/abs)/abs);
}
float TContDistribution::var() const
{
if (!abs)
if (variable)
raiseError("cannot compute variance ('%s' has no defined values)", variable->name.c_str());
else
raiseError("cannot compute variance (attribute has no defined values)");
return (sum2-sum*sum/abs)/abs;
}
float TContDistribution::error() const
{ return abs<=1.0 ? 0.0 : sqrt((sum2-sum*sum/abs)/(abs-1) / abs); }
float TContDistribution::percentile(const float &perc) const
{ if ((perc<0) || (perc>100))
raiseError("invalid percentile");
if (!size())
raiseError("empty distribution");
if (perc==0.0)
return (*begin()).first;
if (perc==100.0) {
const_iterator li(end());
return (*--li).first;
}
float togo = abs*perc/100.0;
const_iterator ths(begin()), prev, ee(end());
if (ths == ee)
raiseError("empty distribution");
while ((ths != ee) && (togo > 0)) {
togo -= (*ths).second;
prev = ths;
ths++;
}
if ((togo < 0) || (ths == ee))
return (*prev).first;
// togo==0.0 && ths!=ee
return ((*prev).first + (*ths).first) / 2.0;
}
void TContDistribution::normalize()
{ if (!normalized) {
if (abs) {
this_ITERATE(dvi)
(*dvi).second /= abs;
sum /= abs;
sum2 /= abs;
abs = 1.0;
}
else if (size()) {
float p = 1.0/float(size());
sum = 0.0;
sum2 = 0.0;
this_ITERATE(dvi) {
(*dvi).second = p;
sum += (*dvi).first;
sum2 += sqr((*dvi).first);
}
sum /= abs;
sum2 /= abs;
abs = 1.0;
}
normalized = true;
}
}
float TContDistribution::randomFloat()
{
if (!randomGenerator)
randomGenerator = mlnew TRandomGenerator;
float ri = randomGenerator->randfloat(abs);
const_iterator di(begin());
while (ri > (*di).first)
ri -= (*(di++)).first;
return (*di).second;
}
float TContDistribution::randomFloat(const long &random)
{
float ri = (random & 0x7fffffff) / float(0x7fffffff);
const_iterator di(begin());
while (ri > (*di).first)
ri -= (*(di++)).first;
return (*di).second;
}
float TContDistribution::p(const float &x) const
{ const_iterator rb = upper_bound(x);
if (rb==end())
return 0.0;
if ((*rb).first==x)
return (*rb).second;
if (rb==begin())
return 0.0;
const_iterator lb = rb;
lb--;
return (*lb).second + (x - (*lb).first) * ((*rb).second - (*lb).second) / ((*rb).first - (*lb).first);
}
int TContDistribution::sumValues() const
{ unsigned long crc;
INIT_CRC(crc);
const_this_ITERATE(dvi) {
add_CRC((*dvi).first, crc);
add_CRC((*dvi).second, crc);
}
FINISH_CRC(crc);
return int(crc & 0x7fffffff);
}
TGaussianDistribution::TGaussianDistribution(const float &amean, const float &asigma, const float &anabs)
: mean(amean),
sigma(asigma)
{
abs = anabs;
normalized = true;
supportsContinuous = true;
}
TGaussianDistribution::TGaussianDistribution(PDistribution dist)
: mean(dist->average()),
sigma(sqrt(dist->dev()))
{
abs = dist->abs;
normalized = true;
supportsContinuous = true;
}
float TGaussianDistribution::average() const
{ return mean; }
float TGaussianDistribution::var() const
{ return sigma*sigma; }
float TGaussianDistribution::dev() const
{ return sigma; }
float TGaussianDistribution::error() const
{ return sigma; }
void TGaussianDistribution::normalize()
{ abs = 1.0; }
float TGaussianDistribution::highestProbFloatIndex() const
{ return mean; }
#define pi 3.1415926535897931
float TGaussianDistribution::highestProb() const
{ return abs * 1/(sigma * sqrt(2*pi)); }
float TGaussianDistribution::randomFloat()
{
if (!randomGenerator)
randomGenerator = mlnew TRandomGenerator;
return (float)gasdev((double)mean, (double)sigma, randomGenerator.getReference());
}
float TGaussianDistribution::randomFloat(const long &random)
{
TRandomGenerator rg(random);
return (float)gasdev((double)mean, (double)sigma, rg);
}
float TGaussianDistribution::p(const float &x) const
{ return abs * exp(-sqr((x-mean)/2/sigma)) / (sigma*sqrt(2*pi)); }
bool TGaussianDistribution::noDeviation() const
{ return sigma==0.0; }
int TGaussianDistribution::sumValues() const
{ unsigned long crc;
INIT_CRC(crc);
add_CRC(mean, crc);
add_CRC(sigma, crc);
FINISH_CRC(crc);
return int(crc & 0x7fffffff);
}
TDomainDistributions::TDomainDistributions()
{}
TDomainDistributions::TDomainDistributions(PExampleGenerator gen, const long weightID, bool skipDiscrete, bool skipContinuous)
{
reserve(gen->domain->variables->size());
PITERATE(TVarList, vi, gen->domain->variables)
push_back( skipDiscrete && ((*vi)->varType == TValue::INTVAR)
|| skipContinuous && ((*vi)->varType == TValue::FLOATVAR) ? PDistribution() : TDistribution::create(*vi));
for(TExampleIterator fi(gen->begin()); fi; ++fi) {
TExample::iterator ei=(*fi).begin();
float weight=WEIGHT(*fi);
for(iterator di=begin(); di!=end(); di++, ei++)
if (*di)
(*di)->add(*ei, weight);
}
}
void TDomainDistributions::normalize()
{ this_ITERATE(di)
(*di)->normalize();
}
PDistribution getClassDistribution(PExampleGenerator gen, const long &weightID)
{ if (!gen)
raiseErrorWho("getClassDistribution", "no examples");
if (!gen->domain || !gen->domain->classVar)
raiseErrorWho("getClassDistribution", "invalid example generator or class-less domain");
PDistribution classDist = TDistribution::create(gen->domain->classVar);
TDistribution *uclassdist = const_cast<TDistribution *>(classDist.getUnwrappedPtr());
PEITERATE(ei, gen)
uclassdist->add((*ei).getClass(), WEIGHT(*ei));
return classDist;
}
#undef NOT_IMPLEMENTED
#undef CHECKVALTYPE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -