⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lookup.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/


#include <limits>
#include "random.hpp"
#include "vars.hpp"
#include "domain.hpp"
#include "estimateprob.hpp"
#include "examples.hpp"
#include "examplegen.hpp"
#include "stladdon.hpp"
#include "distvars.hpp"
#include "bayes.hpp"
#include "contingency.hpp"
#include "table.hpp"
#include "filter.hpp"

#include "lookup.ppp"

inline TValue getValue(const TExample &ex, const int &varIndex, PVariable variable)
{ return (varIndex==ILLEGAL_INT) ? variable->computeValue(ex) : ex[varIndex]; }



TClassifierByLookupTable::TClassifierByLookupTable(PVariable aclass, PValueList vlist)
: TClassifier(aclass, false), // we want TClassifier::classDistribution to call operator() when there are no distributions
  lookupTable(vlist),
  distributions(mlnew TDistributionList())
{ 
  if (lookupTable)
    for(int i = lookupTable->size(); i--; )
      distributions->push_back(TDistribution::create(aclass));
}


void TClassifierByLookupTable::valuesFromDistributions()
{
  if (lookupTable->size() != distributions->size())
    raiseError("sizes of 'lookupTable' and 'distributions' mismatch");

  TValueList::iterator vi(lookupTable->begin());
  TDistributionList::const_iterator di(distributions->begin()), de(distributions->end());
  for(; di!=de; di++, vi++)
    if ((*vi).isSpecial())
      *vi = (*di)->highestProbValue();
}


TClassifierByLookupTable1::TClassifierByLookupTable1(PVariable aclass, PVariable avar)
: TClassifierByLookupTable(aclass, mlnew TValueList(avar->noOfValues()+1, aclass->DK(), aclass)), 
  variable1(avar), 
  lastDomainVersion(-1)
{}


void TClassifierByLookupTable1::setLastDomain(PDomain domain)
{ lastVarIndex = domain->getVarNum(variable1, false);
  lastDomainVersion = domain->version;
}


int TClassifierByLookupTable1::getIndex(const TExample &ex, TExample *conv)
{ if (lastDomainVersion!=ex.domain->version) 
    setLastDomain(ex.domain);
  
  TValue val = getValue(ex, lastVarIndex, variable1);
  
  if (val.isSpecial()) {
    if (conv)
      (*conv)[0] = val;
    return -1;
  }
  
  return val.intV;
}


TValue TClassifierByLookupTable1::operator()(const TExample &ex)
{ if (lastDomainVersion!=ex.domain->version)
    setLastDomain(ex.domain);

  TValue val = getValue(ex, lastVarIndex, variable1);
  return (val.isSpecial() || (val.intV>=int(lookupTable->size())))
    ? lookupTable->back()
    : lookupTable->operator[](val.intV);
}


PDistribution TClassifierByLookupTable1::classDistribution(const TExample &ex)
{ if (!distributions)
    return TClassifier::classDistribution(ex);

  if (lastDomainVersion!=ex.domain->version)
    setLastDomain(ex.domain);

  TValue val = getValue(ex, lastVarIndex, variable1);
  return (val.isSpecial() || (val.intV>=int(distributions->size())))
    ? CLONE(TDistribution, distributions->back())
    : CLONE(TDistribution, distributions->operator[](val.intV));
}


void TClassifierByLookupTable1::predictionAndDistribution(const TExample &ex, TValue &value, PDistribution &dist)
{ if (!distributions) {
    TClassifier::predictionAndDistribution(ex, value, dist);
    return;
  }

  if (lastDomainVersion!=ex.domain->version)
    setLastDomain(ex.domain);

  TValue val = getValue(ex, lastVarIndex, variable1);
  if (val.isSpecial() || (val.intV>=int(lookupTable->size()))) {
    value = lookupTable->back();
    dist = CLONE(TDistribution, distributions->back());
  }
  else {
    value = lookupTable->operator[](val.intV);
    dist = CLONE(TDistribution, distributions->operator[](val.intV));
  }
}


/* valDistribution is distribution of values for variable
   If "distributions" are known, this computes the weighted sum of distributions
   and stores it in lookup table so that they're used anywhere where the table element is missing
   Otherwise, it computes the distribution of classes and stores the majority into empty
   elements of the lookup table.
*/
void TClassifierByLookupTable1::replaceDKs(TDiscDistribution &valDistribution)
{ TDiscDistribution sum(classVar), *classes = mlnew TDiscDistribution(classVar);
  PDistribution wclasses = PDistribution(classes);
  if (distributions) {
    if (valDistribution.abs) {
      TValueList::iterator vi(lookupTable->begin());
      valDistribution[distributions->size()-1];
      TDiscDistribution::iterator di(valDistribution.begin());
      for(TDistributionList::iterator dvi(distributions->begin()), dve(distributions->end());
          dvi!=dve;
          dvi++, vi++, di++)
        if (!(*vi).isSpecial()) {
          const TDiscDistribution &tdi = CAST_TO_DISCDISTRIBUTION(*dvi);
          TDiscDistribution::const_iterator tdii(tdi.begin());
          for(TDiscDistribution::iterator si(sum.begin()), se(sum.end()); si!=se; si++, tdii++)
            *si += *tdii * *di;
          sum.abs += tdi.abs * *di;
          classes->addint((*vi).intV, *di);
        }
    }
    else {
      TValueList::iterator vi(lookupTable->begin());
      for(TDistributionList::iterator dvi(distributions->begin()), dve(distributions->end()); dvi!=dve; dvi++, vi++)
        if (!(*vi).isSpecial()) {
          sum += CAST_TO_DISCDISTRIBUTION(*dvi);
          classes->addint((*vi).intV, 1);
        }
    }

    sum.normalize();
    TValueList::iterator vi(lookupTable->begin());
    PITERATE(TDistributionList, dvi, distributions)
      if ((*vi).isSpecial()) {
        *dvi = mlnew TDiscDistribution(sum);
        *(vi++) = classes->highestProbValue(); // this does not need to be the same for each call!
      }
      else
        vi++;
  }
  else {
    TDiscDistribution::iterator di(valDistribution.begin());
    if (valDistribution.abs) {
      for(TValueList::iterator vi(lookupTable->begin()), ve(lookupTable->end()); vi!=ve; vi++, di++)
        if (!(*vi).isSpecial())
          classes->addint((*vi).intV, *di);
    }
    else {
      for(TValueList::iterator vi(lookupTable->begin()), ve(lookupTable->end()); vi!=ve; vi++, di++)
        if (!(*vi).isSpecial())
          classes->addint((*vi).intV, 1.0);
    }

    PITERATE(TValueList, vi, lookupTable)
      if ((*vi).isSpecial())
        (*vi) = classes->highestProbValue(); // this does not need to be the same for each call!
  }
}


void TClassifierByLookupTable1::giveBoundSet(TVarList &boundSet)
{ boundSet = TVarList(1, variable1); }




TClassifierByLookupTable2::TClassifierByLookupTable2(PVariable aclass, PVariable avar1, PVariable avar2, PEFMDataDescription adata)
: TClassifierByLookupTable(aclass, mlnew TValueList((avar1->noOfValues()) * (avar2->noOfValues()), aclass->DK(), aclass)), 
  variable1(avar1),
  variable2(avar2),
  noOfValues1(avar1->noOfValues()),
  noOfValues2(avar2->noOfValues()),
  dataDescription(adata),
  lastDomainVersion(-1)
{ if (!adata) {
    TVarList attributes;
    attributes.push_back(variable1);
    attributes.push_back(variable2);
    dataDescription=mlnew TEFMDataDescription(mlnew TDomain(PVariable(), attributes)); 
  }
}


void TClassifierByLookupTable2::setLastDomain(PDomain domain)
{ lastVarIndex1 = domain->getVarNum(variable1, false);
  lastVarIndex2 = domain->getVarNum(variable2, false);
  lastDomainVersion = domain->version;
}


int TClassifierByLookupTable2::getIndex(const TExample &ex, TExample *conv)
{ if (lastDomainVersion!=ex.domain->version) 
    setLastDomain(ex.domain);
  
  TValue val1 = getValue(ex, lastVarIndex1, variable1);
  TValue val2 = getValue(ex, lastVarIndex2, variable2);
  
  if (val1.isSpecial() || val2.isSpecial()) {
    if (conv) {
      (*conv)[0] = val1;
      (*conv)[1] = val2;
    }
    return -1;
  }
  
  return noOfValues2 * val1.intV + val2.intV;
}


TValue TClassifierByLookupTable2::operator()(const TExample &ex)
{ TExample conv(dataDescription->domain);
  
  int index=getIndex(ex, &conv);
  if (index<0)
    return TClassifier::operator()(conv, dataDescription);
  else if (index>=int(lookupTable->size()))
    return dataDescription->domainDistributions->back()->highestProbValue(ex);
  else 
    return lookupTable->operator[](index);
}


PDistribution TClassifierByLookupTable2::classDistribution(const TExample &ex)
{ if (!distributions)
    return TClassifier::classDistribution(ex);

  TExample conv(dataDescription->domain);

  int index=getIndex(ex, &conv);
  if (index<0) 
    return TClassifier::classDistribution(conv, dataDescription);
  else if (index>=int(distributions->size()))
    return CLONE(TDistribution, dataDescription->domainDistributions->back());
  else
    return CLONE(TDistribution, distributions->operator[](index));
}


void TClassifierByLookupTable2::predictionAndDistribution(const TExample &ex, TValue &value, PDistribution &dist)
{ if (!distributions) {
    TClassifier::predictionAndDistribution(ex, value, dist);
    return;
  }

  TExample conv(dataDescription->domain);

  int index=getIndex(ex, &conv);
  if (index<0) {
    dist = TClassifier::classDistribution(conv, dataDescription);
    value = dist->highestProbValue(ex);
  }
  else if (index>=int(distributions->size())) {
    dist = CLONE(TDistribution, dataDescription->domainDistributions->back());
    value = dist->highestProbValue(ex);
  }
  else {
    dist = CLONE(TDistribution, distributions->operator[](index));
    value = lookupTable->operator[](index);
  }
}


void TClassifierByLookupTable2::replaceDKs(PExampleGenerator examples, bool useBayes)
{
  PClassifier bayes;
  PDistribution classDist;

  if (useBayes)
    bayes = TBayesLearner()(examples);
  else
    classDist =  getClassDistribution(examples /*, weightID */);

  TValueList::iterator vi(lookupTable->begin());
  TDistributionList::iterator di(distributions->begin());
  bool distr = distributions && (distributions->size()>0);
  TExample example(dataDescription->domain);
  variable1->firstValue(example[0]);
  do {
    variable2->firstValue(example[1]);
    do {
      if ((*vi).isSpecial()) 
        if (useBayes) {
          *vi = bayes->operator()(example);
          if (distr)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -