⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tdidt_split.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:
                     : measure->bestThreshold(thisSubsets, thisQuality, *vi, gen, apriorClass, weightID, minSubset);
        if ((thisThreshold != ILLEGAL_FLOAT)
              && (   (!wins || (thisQuality>quality)) && ((wins=1)==1)
                  || (thisQuality==quality) && rgen.randbool(++wins))) {
          bestAttr = thisAttr;
          quality = thisQuality;
          subsetSizes = thisSubsets;
          bestThreshold = thisThreshold;
        }
    }
    if (throughCont)
      dci++;
  }
  
  if (!wins)
    return returnNothing(descriptions, subsetSizes, quality, spentAttribute);

  if (quality<worstAcceptable)
    return returnNothing(descriptions, subsetSizes, spentAttribute);

  PVariable bvar;
  if (subsetSizes && subsetSizes->variable)
    bvar = subsetSizes->variable;
  else {
    TEnumVariable *evar = mlnew TEnumVariable("");
    evar->addValue("0");
    evar->addValue("1");
    bvar = evar;
  }

  descriptions = mlnew TStringList();
  char str[128];
  sprintf(str, "<%3.3f", bestThreshold);
  descriptions->push_back(str);
  sprintf(str, ">=%3.3f", bestThreshold);
  descriptions->push_back(str);

  bvar->name = gen->domain->attributes->at(bestAttr)->name;
  spentAttribute = -1;
  return mlnew TClassifierFromVarFD(bvar, gen->domain, bestAttr, subsetSizes, mlnew TThresholdDiscretizer(bestThreshold));
}



PExampleGeneratorList TTreeExampleSplitter::prepareGeneratorList(int size, PDomain domain, vector<TExampleTable *> &unwrapped)
{
  PExampleGeneratorList examplePtrs = mlnew TExampleGeneratorList();
  while(size--) {
    TExampleTable *ntable = mlnew TExampleTable(domain, false);
    examplePtrs->push_back(PExampleGenerator(ntable));
    unwrapped.push_back(ntable);
  }

  return examplePtrs;
}


bool TTreeExampleSplitter::getBranchIndices(PTreeNode node, PExampleGenerator gen, vector<int> &indices)
{
  TClassifier &branchSelector = node->branchSelector.getReference();
  const int maxIndex = node->branchDescriptions->size();
  
  PEITERATE(ei, gen) {
    TValue index = branchSelector(*ei);
    if (index.isSpecial() || (index.intV<0) || (index.intV>=maxIndex))
      return false;
    indices.push_back(index.intV);
  }

  return true;
}

PExampleGeneratorList TTreeExampleSplitter_IgnoreUnknowns::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
  const int maxIndex = node->branchDescriptions->size();

  vector<TExampleTable *> uexamplePtrs;
  PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);
  PEITERATE(ei, gen) {
    TValue index = branchSelector(*ei);
    if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
      uexamplePtrs[index.intV]->addExample(*ei);
  }

  return examplePtrs;
}


PExampleGeneratorList TTreeExampleSplitter_UnknownsToCommon::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ 
  if (!node->branchSizes)
    raiseError("TreeExampleSplitter_UnknownsToCommon: splitConstructor didn't set the branchSize; use different constructor or splitter");

  TClassifier &branchSelector = node->branchSelector.getReference();
  const int maxIndex = node->branchDescriptions->size();
  const int mostCommon = node->branchSizes->highestProbIntIndex();

  vector<TExampleTable *> uexamplePtrs;
  PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);

  PEITERATE(ei, gen) {
    TValue index = branchSelector(*ei);
    uexamplePtrs[!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex) ? index.intV : mostCommon]->addExample(*ei);
  }

  return examplePtrs;
}


PExampleGeneratorList TTreeExampleSplitter_UnknownsToAll::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
  const int maxIndex = node->branchDescriptions->size();

  vector<TExampleTable *> uexamplePtrs;
  PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);

  PEITERATE(ei, gen) {
    TValue index = branchSelector(*ei);
    if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
      uexamplePtrs[index.intV]->addExample(*ei);
    else
      ITERATE(vector<TExampleTable *>, pei, uexamplePtrs)
        (*pei)->addExample(*ei);
  }

  return examplePtrs;
}


PExampleGeneratorList TTreeExampleSplitter_UnknownsToRandom::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
  const int maxIndex = node->branchDescriptions->size();

  vector<TExampleTable *> uexamplePtrs;
  PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);

  PEITERATE(ei, gen) {
    TValue index = branchSelector(*ei);
    if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
      uexamplePtrs[index.intV]->addExample(*ei);
    else {
      TDiscDistribution *distr = NULL;
      if (index.svalV)
        distr = index.svalV.AS(TDiscDistribution);
      if (!distr)
        distr = node->branchSizes.AS(TDiscDistribution);
      if (distr)
        uexamplePtrs[distr->randomInt()]->addExample(*ei);
    }
  }

  return examplePtrs;
}


PExampleGeneratorList TTreeExampleSplitter_UnknownsToBranch::operator ()(PTreeNode node, PExampleGenerator gen, const int &, vector<int> &)
{ TClassifier &branchSelector = node->branchSelector.getReference();
  int maxIndex = node->branchDescriptions->size();
  node->branchDescriptions->push_back("unknown");

  vector<TExampleTable *> uexamplePtrs;
  PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex+1, gen->domain, uexamplePtrs);

  PEITERATE(ei, gen) {
    TValue index = branchSelector(*ei);
    if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex))
      uexamplePtrs[index.intV]->addExample(*ei);
    else
      uexamplePtrs.back()->addExample(*ei);
  }

  return examplePtrs;
}


PExampleGeneratorList TTreeExampleSplitter_UnknownsAsBranchSizes::operator()(PTreeNode node, PExampleGenerator gen, const int &weightID, vector<int> &newWeights)
{ 
  int maxIndex = node->branchDescriptions->size();
  TClassifier &branchSelector = node->branchSelector.getReference();
 
  vector<TExampleTable *> uexamplePtrs;
  PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);

  vector<int> indices;
  
  if (getBranchIndices(node, gen, indices)) {
    TExampleIterator ei(gen->begin());
    ITERATE(vector<int>, ii, indices) {
      uexamplePtrs[*ii]->addExample(*ei);
      ++ei;
    }
  }

  else {
    if (!node->branchSizes)
      raiseError("TreeExampleSplitter_UnknownsAsBranchSizes: splitConstructor didn't set the branchSize; use different constructor or splitter");

    const TDiscDistribution &branchSizes = node->branchSizes.getReference();
    for(int i = maxIndex; i--; )
      newWeights.push_back(getMetaID());

    TExampleIterator ei(gen->begin());
    ITERATE(vector<int>, ii, indices) {
      uexamplePtrs[*ii]->addExample(*ei);
      (*ei).setMeta(newWeights[*ii], TValue(WEIGHT(*ei)));
      ++ei;
    }

    for (; ei; ++ei) {
      TValue index = branchSelector(*ei);

      if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex)) {
        uexamplePtrs[index.intV]->addExample(*ei);
        (*ei).setMeta(newWeights[index.intV], TValue(WEIGHT(*ei)));
      }
    
      else {
        if (index.isDC()) {
          for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
            uexamplePtrs[branchNo]->addExample(*ei);
            (*ei).setMeta(newWeights[branchNo], TValue(WEIGHT(*ei)));
          }
        }
        else {
          for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
            float weight = branchSizes.p(branchNo) * WEIGHT(*ei);
            if (weight) {
              uexamplePtrs[branchNo]->addExample(*ei);
              (*ei).setMeta(newWeights[branchNo], TValue(weight));
            }
          }
        }
      }
    }
  }

  return examplePtrs;
}


PExampleGeneratorList TTreeExampleSplitter_UnknownsAsSelector::operator()(PTreeNode node, PExampleGenerator gen, const int &weightID, vector<int> &newWeights)
{ TClassifier &branchSelector = node->branchSelector.getReference();
  int maxIndex = node->branchDescriptions->size();

 
  vector<TExampleTable *> uexamplePtrs;
  PExampleGeneratorList examplePtrs = prepareGeneratorList(maxIndex, gen->domain, uexamplePtrs);

  vector<int> indices;
  
  if (getBranchIndices(node, gen, indices)) {
    TExampleIterator ei(gen->begin());
    ITERATE(vector<int>, ii, indices) {
      uexamplePtrs[*ii]->addExample(*ei);
      ++ei;
    }
  }

  else {
    for(int i = maxIndex; i--; )
      newWeights.push_back(getMetaID());

    TExampleIterator ei(gen->begin());
    ITERATE(vector<int>, ii, indices) {
      uexamplePtrs[*ii]->addExample(*ei);
      (*ei).setMeta(newWeights[*ii], TValue(WEIGHT(*ei)));
      ++ei;
    }

    for (; ei; ++ei) {
      TValue index = branchSelector(*ei);

      if (!index.isSpecial() && (index.intV>=0) && (index.intV<maxIndex)) {
        uexamplePtrs[index.intV]->addExample(*ei);
        (*ei).setMeta(newWeights[index.intV], TValue(WEIGHT(*ei)));
      }
    
      else {
        if (index.isDC()) {
          for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
            uexamplePtrs[branchNo]->addExample(*ei);
            (*ei).setMeta(newWeights[branchNo], TValue(WEIGHT(*ei)));
          }
        }
        else {
          TDiscDistribution *distr = index.svalV ? index.svalV.AS(TDiscDistribution) : NULL;
          if (distr)
            for(int branchNo = 0; branchNo<maxIndex; branchNo++) {
              float weight = distr->p(branchNo) * WEIGHT(*ei);
              if (weight) {
                uexamplePtrs[branchNo]->addExample(*ei);
                (*ei).setMeta(newWeights[branchNo], TValue(weight));
            }
          }
        }
      }
    }
  }

  return examplePtrs;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -