📄 tdidt_split.cpp
字号:
: measure->bestThreshold(thisSubsets, thisQuality, *vi, gen, apriorClass, weightID, minSubset);
if ((thisThreshold != ILLEGAL_FLOAT)
&& ( (!wins || (thisQuality>quality)) && ((wins=1)==1)
|| (thisQuality==quality) && rgen.randbool(++wins))) {
bestAttr = thisAttr;
quality = thisQuality;
subsetSizes = thisSubsets;
bestThreshold = thisThreshold;
}
}
if (throughCont)
dci++;
}
if (!wins)
return returnNothing(descriptions, subsetSizes, quality, spentAttribute);
if (quality<worstAcceptable)
return returnNothing(descriptions, subsetSizes, spentAttribute);
PVariable bvar;
if (subsetSizes && subsetSizes->variable)
bvar = subsetSizes->variable;
else {
TEnumVariable *evar = mlnew TEnumVariable("");
evar->addValue("0");
evar->addValue("1");
bvar = evar;
}
descriptions = mlnew TStringList();
char str[128];
sprintf(str, "<%3.3f", bestThreshold);
descriptions->push_back(str);
sprintf(str, ">=%3.3f", bestThreshold);
descriptions->push_back(str);
bvar->name = gen->domain->attributes->at(bestAttr)->name;
spentAttribute = -1;
return mlnew TClassifierFromVarFD(bvar, gen->domain, bestAttr, subsetSizes, mlnew TThresholdDiscretizer(bestThreshold));
}
PExampleGeneratorList TTreeExampleSplitter::prepareGeneratorList(int size, PDomain domain, vector<TExampleTable *> &unwrapped)
{
PExampleGeneratorList examplePtrs = mlnew TExampleGeneratorList();
while(size--) {
TExampleTable *ntable = mlnew TExampleTable(domain, false);
examplePtrs->push_back(PExampleGenerator(ntable));
unwrapped.push_back(ntable);
}
return examplePtrs;
}
bool TTreeExampleSplitter::getBranchIndices(PTreeNode node, PExampleGenerator gen, vector<int> &indices)
{
TClassifier &branchSelector = node->branchSelector.getReference();
const int maxIndex = node->branchDescriptions->size();
PEITERATE(ei, gen) {
TValue index = branchSelector(*ei);
if (index.isSpecial() || (index.intV<0) || (index.intV>=maxIndex))
return false;
indices.push_back(index.intV);
}
return true;
}
PExampleGeneratorList TTreeExampleSplitter_IgnoreUnknowns::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
const int maxIndex = node->branchDescriptions->size();
vector<TExampleTable *> uexamplePtrs;
PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);
PEITERATE(ei, gen) {
TValue index = branchSelector(*ei);
if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
uexamplePtrs[index.intV]->addExample(*ei);
}
return examplePtrs;
}
PExampleGeneratorList TTreeExampleSplitter_UnknownsToCommon::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{
if (!node->branchSizes)
raiseError("TreeExampleSplitter_UnknownsToCommon: splitConstructor didn't set the branchSize; use different constructor or splitter");
TClassifier &branchSelector = node->branchSelector.getReference();
const int maxIndex = node->branchDescriptions->size();
const int mostCommon = node->branchSizes->highestProbIntIndex();
vector<TExampleTable *> uexamplePtrs;
PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);
PEITERATE(ei, gen) {
TValue index = branchSelector(*ei);
uexamplePtrs[!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex) ? index.intV : mostCommon]->addExample(*ei);
}
return examplePtrs;
}
PExampleGeneratorList TTreeExampleSplitter_UnknownsToAll::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
const int maxIndex = node->branchDescriptions->size();
vector<TExampleTable *> uexamplePtrs;
PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);
PEITERATE(ei, gen) {
TValue index = branchSelector(*ei);
if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
uexamplePtrs[index.intV]->addExample(*ei);
else
ITERATE(vector<TExampleTable *>, pei, uexamplePtrs)
(*pei)->addExample(*ei);
}
return examplePtrs;
}
PExampleGeneratorList TTreeExampleSplitter_UnknownsToRandom::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
const int maxIndex = node->branchDescriptions->size();
vector<TExampleTable *> uexamplePtrs;
PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);
PEITERATE(ei, gen) {
TValue index = branchSelector(*ei);
if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
uexamplePtrs[index.intV]->addExample(*ei);
else {
TDiscDistribution *distr = NULL;
if (index.svalV)
distr = index.svalV.AS(TDiscDistribution);
if (!distr)
distr = node->branchSizes.AS(TDiscDistribution);
if (distr)
uexamplePtrs[distr->randomInt()]->addExample(*ei);
}
}
return examplePtrs;
}
PExampleGeneratorList TTreeExampleSplitter_UnknownsToBranch::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
int maxIndex = node->branchDescriptions->size();
node->branchDescriptions->push_back("unknown");
vector<TExampleTable *> uexamplePtrs;
PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex+1, gen->domain, uexamplePtrs);
PEITERATE(ei, gen) {
TValue index = branchSelector(*ei);
if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
uexamplePtrs[index.intV]->addExample(*ei);
else
uexamplePtrs.back()->addExample(*ei);
}
return examplePtrs;
}
PExampleGeneratorList TTreeExampleSplitter_UnknownsAsBranchSizes::operator()(PTreeNode node, PExampleGenerator gen, const int &weightID, vector<int> &newWeights)
{
int maxIndex = node->branchDescriptions->size();
TClassifier &branchSelector = node->branchSelector.getReference();
vector<TExampleTable *> uexamplePtrs;
PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);
vector<int> indices;
if (getBranchIndices(node, gen, indices)) {
TExampleIterator ei(gen->begin());
ITERATE(vector<int>, ii, indices) {
uexamplePtrs[*ii]->addExample(*ei);
++ei;
}
}
else {
if (!node->branchSizes)
raiseError("TreeExampleSplitter_UnknownsAsBranchSizes: splitConstructor didn't set the branchSize; use different constructor or splitter");
const TDiscDistribution &branchSizes = node->branchSizes.getReference();
for(int i = maxIndex; i--; )
newWeights.push_back(getMetaID());
TExampleIterator ei(gen->begin());
ITERATE(vector<int>, ii, indices) {
uexamplePtrs[*ii]->addExample(*ei);
(*ei).setMeta(newWeights[*ii], TValue(WEIGHT(*ei)));
++ei;
}
for (; ei; ++ei) {
TValue index = branchSelector(*ei);
if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex)) {
uexamplePtrs[index.intV]->addExample(*ei);
(*ei).setMeta(newWeights[index.intV], TValue(WEIGHT(*ei)));
}
else {
if (index.isDC()) {
for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
uexamplePtrs[branchNo]->addExample(*ei);
(*ei).setMeta(newWeights[branchNo], TValue(WEIGHT(*ei)));
}
}
else {
for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
float weight = branchSizes.p(branchNo) * WEIGHT(*ei);
if (weight) {
uexamplePtrs[branchNo]->addExample(*ei);
(*ei).setMeta(newWeights[branchNo], TValue(weight));
}
}
}
}
}
}
return examplePtrs;
}
PExampleGeneratorList TTreeExampleSplitter_UnknownsAsSelector::operator()(PTreeNode node, PExampleGenerator gen, const int &weightID, vector<int> &newWeights)
{ TClassifier &branchSelector = node->branchSelector.getReference();
int maxIndex = node->branchDescriptions->size();
vector<TExampleTable *> uexamplePtrs;
PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);
vector<int> indices;
if (getBranchIndices(node, gen, indices)) {
TExampleIterator ei(gen->begin());
ITERATE(vector<int>, ii, indices) {
uexamplePtrs[*ii]->addExample(*ei);
++ei;
}
}
else {
for(int i = maxIndex; i--; )
newWeights.push_back(getMetaID());
TExampleIterator ei(gen->begin());
ITERATE(vector<int>, ii, indices) {
uexamplePtrs[*ii]->addExample(*ei);
(*ei).setMeta(newWeights[*ii], TValue(WEIGHT(*ei)));
++ei;
}
for (; ei; ++ei) {
TValue index = branchSelector(*ei);
if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex)) {
uexamplePtrs[index.intV]->addExample(*ei);
(*ei).setMeta(newWeights[index.intV], TValue(WEIGHT(*ei)));
}
else {
if (index.isDC()) {
for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
uexamplePtrs[branchNo]->addExample(*ei);
(*ei).setMeta(newWeights[branchNo], TValue(WEIGHT(*ei)));
}
}
else {
TDiscDistribution *distr = index.svalV ? index.svalV.AS(TDiscDistribution) : NULL;
if (distr)
for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
float weight = distr->p(branchNo) * WEIGHT(*ei);
if (weight) {
uexamplePtrs[branchNo]->addExample(*ei);
(*ei).setMeta(newWeights[branchNo], TValue(weight));
}
}
}
}
}
}
return examplePtrs;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -