⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lookup.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:
            *di = bayes->classDistribution(example);
        }
        else {
          *vi = classDist->highestProbValue(example);
          if (distr) 
            *di = CLONE(TDistribution, classDist);
        }
      vi++;
      if (distr) 
        di++;
    } while(variable2->nextValue(example[1]));
  } while (variable1->nextValue(example[0]));
}


void TClassifierByLookupTable2::giveBoundSet(TVarList &boundSet)
{ boundSet=TVarList();
  boundSet.push_back(variable1);
  boundSet.push_back(variable2);
}



TClassifierByLookupTable3::TClassifierByLookupTable3(PVariable aclass, PVariable avar1, PVariable avar2, PVariable avar3, PEFMDataDescription adata)
: TClassifierByLookupTable(aclass, mlnew TValueList((avar1->noOfValues()) * (avar2->noOfValues()) * (avar3->noOfValues()), aclass->DK(), aclass)),
  variable1(avar1),
  variable2(avar2),
  variable3(avar3),
  noOfValues1(avar1->noOfValues()),
  noOfValues2(avar2->noOfValues()),
  noOfValues3(avar3->noOfValues()),
  dataDescription(adata),
  lastDomainVersion(-1)
{ if (!adata) {
    TVarList attributes;
    attributes.push_back(variable1);
    attributes.push_back(variable2);
    attributes.push_back(variable3);
    dataDescription = mlnew TEFMDataDescription(mlnew TDomain(PVariable(), attributes)); 
  }
}


void TClassifierByLookupTable3::setLastDomain(PDomain domain)
{ 
  lastVarIndex1 = domain->getVarNum(variable1, false);
  lastVarIndex2 = domain->getVarNum(variable2, false);
  lastVarIndex3 = domain->getVarNum(variable3, false);
  lastDomainVersion=domain->version;
}


int TClassifierByLookupTable3::getIndex(const TExample &ex, TExample *conv)
{  if (lastDomainVersion!=ex.domain->version)
     setLastDomain(ex.domain);
  
  TValue val1=getValue(ex, lastVarIndex1, variable1);
  TValue val2=getValue(ex, lastVarIndex2, variable2);
  TValue val3=getValue(ex, lastVarIndex3, variable3);

   if (val1.isSpecial() || val2.isSpecial() || val3.isSpecial()) {
     if (conv) {
       (*conv)[0]=val1;
       (*conv)[1]=val2;
       (*conv)[2]=val3;
     }
    return -1;
  }
  
  return noOfValues3 * (noOfValues2 * val1.intV + val2.intV) + val3.intV;
}


TValue TClassifierByLookupTable3::operator()(const TExample &ex)
{ TExample conv(dataDescription->domain);
  
  int index=getIndex(ex, &conv);
  if (index<0)
    return TClassifier::operator()(conv, dataDescription);
  else if (index>=int(lookupTable->size()))
    return dataDescription->domainDistributions->back()->highestProbValue(ex);
  else 
    return lookupTable->operator[](index);
}


PDistribution TClassifierByLookupTable3::classDistribution(const TExample &ex)
{ if (!distributions)
    return TClassifier::classDistribution(ex);

  TExample conv(dataDescription->domain);

  int index=getIndex(ex, &conv);
  if (index<0) 
    return TClassifier::classDistribution(conv, dataDescription);
  else if (index>=int(distributions->size()))
    return CLONE(TDistribution, dataDescription->domainDistributions->back());
  else
    return CLONE(TDistribution, distributions->operator[](index));
}


void TClassifierByLookupTable3::predictionAndDistribution(const TExample &ex, TValue &value, PDistribution &dist)
{ if (!distributions) {
    TClassifier::predictionAndDistribution(ex, value, dist);
    return;
  }

  TExample conv(dataDescription->domain);

  int index=getIndex(ex, &conv);
  if (index<0) {
    dist = TClassifier::classDistribution(conv, dataDescription);
    value = dist->highestProbValue(ex);
  }
  else if (index>=int(distributions->size())) {
    dist = CLONE(TDistribution, dataDescription->domainDistributions->back());
    value = dist->highestProbValue(ex);
  }
  else {
    dist = CLONE(TDistribution, distributions->operator[](index));
    value = lookupTable->operator[](index);
  }
}


void TClassifierByLookupTable3::replaceDKs(PExampleGenerator examples, bool useBayes)
{
  PClassifier bayes;
  PDistribution classDist;

  if (useBayes)
    bayes = TBayesLearner()(examples);
  else
    classDist = getClassDistribution(examples /*, weight */);

  TValueList::iterator vi(lookupTable->begin());
  TDistributionList::iterator di(distributions->begin());
  bool distr=distributions && (distributions->size()>0);
  TExample example(dataDescription->domain);
  variable1->firstValue(example[0]);
  do {
    variable2->firstValue(example[1]);
    do {
      variable3->firstValue(example[2]);
      do {
        if ((*vi).isSpecial()) 
          if (useBayes) {
            *vi=bayes->operator()(example);
            if (distr) *di=bayes->classDistribution(example);
          }
          else {
            *vi = classDist->highestProbValue(example);
            if (distr) 
              *di = CLONE(TDistribution, classDist);
          }
        vi++;
        if (distr)
          di++;
      } while (variable3->nextValue(example[2]));
    } while (variable2->nextValue(example[1]));
  } while (variable1->nextValue(example[0]));
}

void TClassifierByLookupTable3::giveBoundSet(TVarList &boundSet)
{ boundSet = TVarList();
  boundSet.push_back(variable1);
  boundSet.push_back(variable2);
  boundSet.push_back(variable3);
}



TClassifierByLookupTableN::TClassifierByLookupTableN(PVariable aclass, PVarList avars, PEFMDataDescription adata)
: TClassifierByLookupTable(aclass, NULL),
  variables(avars),
  noOfValues(mlnew TIntList()),
  dataDescription(adata),
  lastDomainVersion(-1)
{ 
  long int totvals = 1;
  const_PITERATE(TVarList, ai, avars) {
    if ((*ai)->varType != TValue::INTVAR)
      raiseError("lookup tables only work with discrete attributes");
    noOfValues->push_back((*ai)->noOfValues());
    totvals *= (*ai)->noOfValues();
  }

  lookupTable = mlnew TValueList(totvals, aclass->DK(), aclass);

  distributions = mlnew TDistributionList();
  for(int i = totvals; i--; )
    distributions->push_back(TDistribution::create(aclass));

  if (!adata)
    dataDescription = mlnew TEFMDataDescription(mlnew TDomain(PVariable(), avars.getReference())); 
}


void TClassifierByLookupTableN::setLastDomain(PDomain domain)
{ 
  lastVarIndices.clear();
  const_PITERATE(TVarList, vi, variables)
    lastVarIndices.push_back(domain->getVarNum(*vi, false));
  lastDomainVersion = domain->version;
}


int TClassifierByLookupTableN::getIndex(const TExample &ex, TExample *conv)
{
  if (lastDomainVersion!=ex.domain->version)
     setLastDomain(ex.domain);

  int index = 0;
  TVarList::const_iterator vi(variables->begin());
  int i = 0;
  vector<int>::const_iterator ii(lastVarIndices.begin()), iie(lastVarIndices.end());
  TIntList::const_iterator ni(noOfValues->begin());
  for(; ii != iie; ii++, vi++, i++, ni++) {

    const TValue val = getValue(ex, *ii, *vi);

    if (val.isSpecial()) {
      if (conv)
        for(; ii != iie; (*conv)[i++] = getValue(ex, *ii++, *vi++));
      return -1;
    }

    index = index * *ni + val.intV;

    if (conv)
      (*conv)[i] = val;
  }

  return index;
}


TValue TClassifierByLookupTableN::operator()(const TExample &ex)
{ 
  TExample conv(dataDescription->domain);
  
  int index = getIndex(ex, &conv);
  if (index<0)
    return TClassifier::operator()(conv, dataDescription);
  else if (index >= int(lookupTable->size()))
    return dataDescription->domainDistributions->back()->highestProbValue(ex);
  else 
    return lookupTable->operator[](index);
}


PDistribution TClassifierByLookupTableN::classDistribution(const TExample &ex)
{
  if (!distributions)
    return TClassifier::classDistribution(ex);

  TExample conv(dataDescription->domain);

  int index = getIndex(ex, &conv);
  if (index < 0) 
    return TClassifier::classDistribution(conv, dataDescription);
  else if (index >= int(distributions->size()))
    return CLONE(TDistribution, dataDescription->domainDistributions->back());
  else
    return CLONE(TDistribution, distributions->operator[](index));
}


void TClassifierByLookupTableN::predictionAndDistribution(const TExample &ex, TValue &value, PDistribution &dist)
{ 
  if (!distributions) {
    TClassifier::predictionAndDistribution(ex, value, dist);
    return;
  }

  TExample conv(dataDescription->domain);

  int index = getIndex(ex, &conv);
  if (index < 0) {
    dist = TClassifier::classDistribution(conv, dataDescription);
    value = dist->highestProbValue(ex);
  }
  else if (index >= int(distributions->size())) {
    dist = CLONE(TDistribution, dataDescription->domainDistributions->back());
    value = dist->highestProbValue(ex);
  }
  else {
    dist = CLONE(TDistribution, distributions->operator[](index));
    value = lookupTable->operator[](index);
  }
}


void TClassifierByLookupTableN::replaceDKs(PExampleGenerator examples, bool useBayes)
{
  raiseWarning("ClassifierByLookupTableN does not provide the function for replacing undefined values yet");
}

void TClassifierByLookupTableN::giveBoundSet(TVarList &boundSet)
{ 
  boundSet = variables.getReference();
}








TLookupLearner::TLookupLearner()
: unknownsHandling(UnknownsKeep),
  allowFastLookups(false)
{}



#define UNKNOWN_CLASS_WARNING \
{ \
  if (!alreadyWarned) { \
    raiseWarning("examples with unknown class are ignored"); \
    alreadyWarned = true; \
  } \
}


PClassifier TLookupLearner::operator()(PExampleGenerator ogen, const int &weightID)
{ 
  if (!ogen->domain->classVar)
    raiseError("class-less domain");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -