⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 classifier.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/


#include "vars.hpp"
#include <math.h>
#include "stladdon.hpp"
#include "random.hpp"
#include "examplegen.hpp"
#include "examples.hpp"
#include "domain.hpp"
//#include "filter.hpp"
#include "table.hpp"

#include "classify.ppp"

DEFINE_TOrangeVector_classDescription(PClassifier, "TClassifierList", true, ORANGE_API)

/* ***** TClassifier methods */

TClassifier::TClassifier(const PVariable &acv, const bool &cp)
: classVar(acv),
  computesProbabilities(cp)
{};


TClassifier::TClassifier(const bool &cp)
: classVar(PVariable()),
  computesProbabilities(cp)
{};


TClassifier::TClassifier(const TClassifier &old)
: TOrange(old),
  classVar(old.classVar),
  computesProbabilities(old.computesProbabilities)
{};


TValue TClassifier::operator ()(const TExample &exam)
{ if (!computesProbabilities)
    raiseError("invalid setting of 'computesProbabilities'");

  return classVar->varType==TValue::FLOATVAR ? TValue(classDistribution(exam)->average()) : classDistribution(exam)->highestProbValue(exam);
}    


PDistribution TClassifier::classDistribution(const TExample &exam)
{ if (computesProbabilities) 
    raiseError("invalid setting of 'computesProbabilities'");

  PDistribution dist = TDistribution::create(classVar);
  dist->add(operator()(exam));
  return dist;
}

void TClassifier::predictionAndDistribution(const TExample &ex, TValue &val, PDistribution &classDist)
{ if (computesProbabilities) {
    classDist = classDistribution(ex);
    val = classVar->varType==TValue::FLOATVAR ? TValue(classDist->average()) : classDist->highestProbValue(ex);
  }
  else {
    val = operator()(ex);
    classDist = TDistribution::create(classVar);
    classDist->add(val);
  }
}



TEFMDataDescription::TEFMDataDescription(PDomain dom, PDomainDistributions dist, int ow, int mw)
: domain(dom),
  domainDistributions(dist),
  originalWeight(ow),
  missingWeight(mw)
{ getAverages();}


void TEFMDataDescription::getAverages()
{ averages = vector<float>();
  if (domainDistributions)
    for(TDomainDistributions::iterator si(domainDistributions->begin()), ei(domainDistributions->end()); si!=ei; si++)
      averages.push_back(((*si)->variable->varType==TValue::INTVAR)
                          ? numeric_limits<float>::quiet_NaN()
                          : (*si)->average());
}


float TEFMDataDescription::getExampleWeight(const TExample &example) const
{ 
  if (example.domain != domain)
    raiseError("example's domain doesn't match the data descriptor's");

  float weight=1.0;
  TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
  TExample::iterator ei(example.begin());
  for(; vi!=vie; ei++, vi++)
    if ((*ei).isDK() && ((*ei).varType == TValue::INTVAR))
      weight /= (*vi)->noOfValues();

  return weight;
}



float TEFMDataDescription::getExampleMatch(const TExample &ex1, const TExample &ex2)
{ 
  if ((ex1.domain != domain) && (ex2.domain != domain))
    raiseError("example's domain doesn't match the data descriptor's");

  float weight=1.0;
  TExample::iterator e1i(ex1.begin()), e2i(ex2.end());

  if (domainDistributions) {
    if (matchProbabilities.size() != domainDistributions->size())
      matchProbabilities = vector<float>(domainDistributions->size(), -1);

    vector<float>::iterator mi(matchProbabilities.begin());
    TDomainDistributions::const_iterator di(domainDistributions->begin()), de(domainDistributions->end());

    for(; di!=de; e1i++, e2i++, di++, mi++) {
      if ((*e1i).varType == TValue::INTVAR) {
        if ((*e1i).isDK()) {
          if ((*e2i).isDK()) {
            if (*mi == -1) {
              float mp = 0.0;
              ITERATE(TDiscDistribution, ddi, ((TDiscDistribution &)((*di).getReference())))
                mp += *ddi * *ddi;
              *mi = mp;
            }
            weight *= *mi;
          }
          else if (!(*e2i).isSpecial())
            weight *= (*di)->p(*e2i);
        }
        else if ((*e2i).isDK() && !(*e1i).isSpecial())
          weight *= (*di)->p(*e1i);
      }
    }
  }
  else {
    TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
    for(; vi!=vie; e1i++, e2i++, vi++)
      if (((*e1i).varType == TValue::INTVAR) && ((*e1i).isDK() && !(*e2i).isSpecial()   ||   (*e2i).isDK() && !(*e1i).isSpecial()))
        weight /= (*vi)->noOfValues();
  }

  return weight;
}



TExampleForMissing::TExampleForMissing(PDomain dom, PEFMDataDescription dd)
: TExample(dom),
  dataDescription(dd)
{ if (dd && (dd->domain!=domain))
    raiseError("data description does not match the domain");
}


TExampleForMissing::TExampleForMissing(const TExampleForMissing &orig)
: TExample((const TExample &)(orig)),
  dataDescription(orig.dataDescription),
  DKs(orig.DKs),
  DCs(orig.DCs)
{}


TExampleForMissing::TExampleForMissing(const TExample &orig, PEFMDataDescription dd)
: TExample(orig),
  dataDescription(dd)
{ if (dd && (dd->domain!=domain))
    raiseError("data description does not match the domain");
}


TExampleForMissing::TExampleForMissing(PDomain dom, const TExample &orig, PEFMDataDescription dd)
: TExample(dom, orig),
  dataDescription(dd)
{ if (dd && (dd->domain!=domain))
    raiseError("data description does not match the domain");
}


TExampleForMissing &TExampleForMissing::operator =(const TExampleForMissing &orig)
{ (TExample &)(*this) = (const TExample &)(orig);
  dataDescription=orig.dataDescription;
  DKs = orig.DKs;
  DCs = orig.DCs;
  return *this;
}

TExample &TExampleForMissing::operator =(const TExample &orig)
{ (TExample &)(*this).TExample::operator=(orig);
  return *this;
}


void TExampleForMissing::resetExample()
{ 
  checkProperty(dataDescription);

  DCs.clear();
  DKs.clear();

  float averageWeight=1;

  TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
  TExample::iterator ei(begin()), bei(ei);
  vector<float>::const_iterator ai(dataDescription->averages.begin());
  for(; vi!=vie; ei++, vi++, ai++)
    if ((*ei).isSpecial()) {
      if ((*vi)->varType==TValue::FLOATVAR)
        *ei=TValue(*ai);
      else if (dataDescription->missingWeight && (*ei).isDK()) {
        DKs.push_back(ei-bei);
        averageWeight/=float((*vi)->noOfValues());
      }
      else
        DCs.push_back(ei-bei);

      (*vi)->firstValue(*ei);
    }

  if (dataDescription->missingWeight) {
    float weight = dataDescription->originalWeight ? getMeta(dataDescription->originalWeight).floatV : 1;
    if (dataDescription->domainDistributions) {
      TDomainDistributions::const_iterator di(dataDescription->domainDistributions->begin());
      ITERATE(vector<int>, ci, DKs) {
        // DKs contain only discrete variables, so it is safe to cast
        const TDiscDistribution &dist = CAST_TO_DISCDISTRIBUTION(*(di+*ci));
        if (dist.abs)
          weight *= dist.front() / dist.abs;
      }
    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -