📄 pnn.cpp

📁 orange源码数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/

#include "examplegen.hpp"
#include "domain.hpp"
#include "basstat.hpp"

#include "pnn.ppp"

TPNN::TPNN(PDomain domain, const int &alaw, const bool normalize)
: TClassifierFD(domain, true),
  dimensions(0),
  offsets(),
  normalizers(),
  normalizeExamples(normalize),
  bases(NULL),
  nExamples(0),
  projections(NULL),
  law(alaw)
{}


TPNN::TPNN(PDomain domain, PExampleGenerator egen, double *bases, const int &alaw, const bool)
{ raiseError("not implemented yet"); }


TPNN::TPNN(PDomain domain, double *examples, const int &nEx, double *ba, const int &dim, PFloatList off, PFloatList norm, const int &alaw, const bool normalize)
: TClassifierFD(domain),
  dimensions(dim),
  offsets(off),
  normalizers(norm),
  normalizeExamples(normalize),
  bases((double *)memcpy(new double[domain->attributes->size()*dim], ba, domain->attributes->size()*dim*sizeof(double))),
  radii(new double[domain->attributes->size()]),
  nExamples(nEx),
  projections(new double[dim*nEx]),
  minClass(0),
  maxClass(0),
  law(alaw)
{
  const int nAttrs = domain->attributes->size();
  TFloatList::const_iterator offi, offb = offsets->begin(), offe = offsets->end();
  TFloatList::const_iterator nori, norb = normalizers->begin(), nore = normalizers->end();

  for(double *base = bases, *basee = base + nAttrs * dim, *radius = radii; base != basee; radius++) {
    for(int d = dim; d--; *radius += sqr(*base++));
    *radius = sqrt(*radius);
  }
  
  double *pi, *pe;
  for(pi = projections, pe = projections + (dim+1)*nEx; pi != pe; *(pi++) = 0.0);

  const bool contClass = domain->classVar->varType == TValue::FLOATVAR;

  for(double *example = examples, *examplee = examples + nEx*dimensions, *projection = projections; example != examplee; projection = pe) {
    offi = offb;
    nori = norb;
    pe = projection + dimensions;
    double *base = bases, *radius = radii;
    double asum = 0.0;
    for(double *ee = example + nAttrs; example != ee; example) {
      double aval = (*(example++) - *(offi++)) / *(nori++);
      for(pi = projection; pi != pe; *(pi++) += aval * *(base++));
      if (normalizeExamples)
        asum += aval * *radius++;
    }
    if (normalizeExamples && (asum > 0.0))
      for(pi = projection; pi != pe; *(pi++) /= asum);

    if (contClass) {
      if (example == examples+dimensions-1)
        minClass = maxClass = *example;
      else {
        if (*example < minClass)
          minClass = *example;
        else if (*example > maxClass)
          maxClass = *example;
      }
    }

    *pe++ = *example++; // copy the class
  }
}


TPNN::TPNN(PDomain domain, double *examples, const int &nEx, double *ba, const int &dim, PFloatList off, PFloatList norm, const int &alaw, const vector<int> &attrIndices, int &nOrigRow, const bool normalize)
: TClassifierFD(domain),
  dimensions(dim),
  offsets(off),
  normalizers(norm),
  normalizeExamples(normalize),
  bases((double *)memcpy(new double[domain->attributes->size()*dim], ba, domain->attributes->size()*dim*sizeof(double))),
  radii(new double[domain->attributes->size()]),
  nExamples(nEx),
  projections(new double[dim*nEx]),
  law(alaw)
{
  const int nAttrs = domain->attributes->size();
  TFloatList::const_iterator offi, offb = offsets->begin(), offe = offsets->end();
  TFloatList::const_iterator nori, norb = normalizers->begin(), nore = normalizers->end();

  for(double *base = bases, *basee = base + nAttrs * dim, *radiii = radii; base != basee; radii++) {
    for(int d = dim; d--; *radii += sqr(*base++));
    *radii = sqrt(*radii);
  }
  
  double *pi, *pe;
  for(pi = projections, pe = projections + (dim+1)*nEx; pi != pe; *(pi++) = 0.0);

  const bool contClass = domain->classVar->varType == TValue::FLOATVAR;

  for(double *example = examples, *examplee = examples + nEx*dimensions, *projection = projections; example != examplee; projection = pe, example += nOrigRow) {
    offi = offb;
    nori = norb;
    pe = projection + dimensions;
    double *base = bases, *radius = radii;
    double asum = 0.0;
    const_ITERATE(vector<int>, ai, attrIndices) {
      double aval = (example[*ai] - *(offi++)) / *(nori++);
      for(pi = projection; pi != pe; *(pi++) += aval * *(base++));
      if (normalizeExamples)
        asum += aval * *radius++;
    }
    if (normalizeExamples && (asum > 0.0))
      for(pi = projection; pi != pe; *(pi++) /= asum);

    const double cls = example[nOrigRow-1];

    if (contClass) {
      if (example == examples+dimensions-1)
        minClass = maxClass = cls;
      else {
        if (cls < minClass)
          minClass = cls;
        else if (cls > maxClass)
          maxClass = cls;
      }
    }

    *pe++ = cls; // copy the class
  }
}


TPNN::TPNN(const int &nDim, const int &nAtt, const int &nEx)
: dimensions(nDim),
  bases(new double[2*nAtt]),
  radii(new double[2*nAtt]),
  nExamples(nEx),
  projections(new double[3*nExamples])
{}


TPNN::TPNN(const TPNN &old)
: TClassifierFD(old),
  dimensions(0),
  bases(NULL),
  radii(NULL),
  nExamples(0),
  projections(NULL)
{ *this = old; }


TPNN &TPNN::operator =(const TPNN &old)
{
  if (bases)
    delete bases;

  const int nAttrs = domain->attributes->size();

  if (bases)
    delete bases;
  bases = old.bases ? (double *)memcpy(new double[nAttrs*dimensions], old.bases, nAttrs*dimensions*sizeof(double)) : NULL;
  
  if (radii)
    delete radii;
  radii = old.radii ? (double *)memcpy(new double[nAttrs], old.radii, nAttrs*sizeof(double)) : NULL;

  if (projections)
    delete projections;
  projections = old.projections ? (double *)memcpy(new double[nExamples*(dimensions+1)], old.projections, nExamples*(dimensions+1)*sizeof(double)) : NULL;

  if (old.offsets)
    offsets = new TFloatList(old.offsets.getReference());
  else
    offsets = PFloatList();

  if (old.normalizers)
    normalizers = new TFloatList(old.normalizers.getReference());
  else
    normalizers = PFloatList();

  nExamples = old.nExamples;
  law = old.law;
  normalizeExamples = old.normalizeExamples;
  minClass = old.minClass;
  maxClass = old.maxClass;

  return *this;
}


TPNN::~TPNN()
{
  if (bases)
    delete bases;

  if (projections)
    delete projections;

  if (radii)
    delete radii;
}


void TPNN::project(const TExample &example, double *projection)
{
  TFloatList::const_iterator offi = offsets->begin(), nori = normalizers->begin();

  double *pi, *pe = projection + dimensions;
  for(pi = projection; pi != pe; *(pi++) = 0.0);

  double *base = bases;
  double *radius = radii;
  double asum = 0.0;

  for(TExample::const_iterator ei = example.begin(), ee = example.end(); ei != ee; ) {
    if ((*ei).isSpecial())
      raiseError("cannot handle missing values");

    double aval = ((*(ei++)).floatV - *(offi++)) / *(nori++);
    for(pi = projection; pi != pe; *(pi++) += aval * *(base++));
    if (normalizeExamples)
      asum +=aval * *radius++;
  }
  if (normalizeExamples)
    for(pi = projection; pi != pe; *(pi++) /= asum);
}


PDistribution TPNN::classDistribution(const TExample &example)
{
  double *projection = mlnew double[dimensions];
  double *pe = projection + dimensions;

  const int nClasses = domain->classVar->noOfValues();
  float *cprob = mlnew float[nClasses];
  for(float *ci = cprob, *ce = cprob + nClasses; ci != ce; *(ci++) = 0.0)
  
  try {
    if (example.domain == domain)
      project(example, projection);
    else {
      TExample nex(domain, example);
      project(example, projection);
    }

    for(double *proj = projections, *proje = projections+ nExamples*(dimensions+1); proj != proje; ) {
      double dist = 0.0;
      double *pi = projection;
      while(pi!=pe)
        dist += sqr(*pi - *(proj++));
      if (dist < 1e-5)
        dist = 1e-5;
      switch(law) {
        case InverseLinear: cprob[int(*(proj++))] += 1/sqrt(dist); break;
        case InverseSquare: cprob[int(*(proj++))] += 1/dist; break;
        case InverseExponential: 
        case KNN: cprob[int(*(proj++))] += exp(-sqrt(dist)); break;
      }
    }

    TDiscDistribution *dist = mlnew TDiscDistribution(cprob, nClasses);
    PDistribution wdist = dist;
    dist->normalize();
    return wdist;
  }
  catch (...) {
    delete projection;
    delete cprob;
    throw;
  }

  delete projection;
  delete cprob;

  return PDistribution();
}




TP2NN::TP2NN(PDomain domain, PExampleGenerator egen, PFloatList basesX, PFloatList basesY, const int &alaw, const bool normalize)
: TPNN(domain, alaw, normalize)
{ 
  dimensions = 2;
  nExamples = egen->numberOfExamples();

  const int nAttrs = domain->attributes->size();

  if ((basesX->size() != nAttrs) || (basesY->size() != nAttrs))
    raiseError("the number of used attributes, x- and y-anchors coordinates mismatch");

  bases = new double[2*domain->attributes->size()];
  radii = new double[domain->attributes->size()];

  double *bi, *radiii;
  TFloatList::const_iterator bxi(basesX->begin()), bxe(basesX->end());
  TFloatList::const_iterator byi(basesY->begin());
  for(radiii = radii, bi = bases; bxi != bxe; *radiii++ = sqrt(sqr(*bxi) + sqr(*byi)), *bi++ = *bxi++, *bi++ = *byi++);

  const TDomain &gendomain = egen->domain.getReference();
  vector<int> attrIdx;
  attrIdx.reserve(nAttrs);

  offsets = new TFloatList();
  normalizers = new TFloatList();
  averages = new TFloatList();
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -