📄 pnn.cpp

📁 orange源码数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
  TDomainDistributions ddist(egen, 0, false, true); // skip continuous

  if (domain->hasContinuousAttributes()) {
    TDomainBasicAttrStat basstat(egen);
    
    const_PITERATE(TVarList, ai, domain->attributes) {
      const int aidx = gendomain.getVarNum(*ai);
      attrIdx.push_back(aidx);
      if ((*ai)->varType == TValue::INTVAR) {
        offsets->push_back(0);
        normalizers->push_back((*ai)->noOfValues() - 1);
        averages->push_back(float(ddist[aidx]->highestProbIntIndex()));
      }
      else if ((*ai)->varType == TValue::FLOATVAR) {
        if (aidx < 0)
          raiseError("P2NN does not accept continuous meta attributes");

        offsets->push_back(basstat[aidx]->min);
        normalizers->push_back(basstat[aidx]->max - basstat[aidx]->min);
        averages->push_back(basstat[aidx]->avg);;
      }
      else
        raiseError("P2NN can only handle discrete and continuous attributes");
    }
  }
  else {
    const_PITERATE(TVarList, ai, domain->attributes) 
      if ((*ai)->varType != TValue::INTVAR)
        raiseError("P2NN can only handle discrete and continuous attributes");
      else {
        const int aidx = gendomain.getVarNum(*ai);
        attrIdx.push_back(aidx);
        offsets->push_back(0.0);
        normalizers->push_back((*ai)->noOfValues()-1);
        averages->push_back(float(ddist[aidx]->highestProbIntIndex()));
      }
  }

  const int &classIdx = gendomain.getVarNum(domain->classVar);
  
  projections = new double[3*egen->numberOfExamples()];
  double *pi, *pe;
  for(pi = projections, pe = projections + 3*nExamples; pi != pe; *(pi++) = 0.0);

  const bool contClass = domain->classVar->varType == TValue::FLOATVAR;

  pi = projections;
  PEITERATE(ei,egen) {
    TValue &cval = (*ei)[classIdx];
    if (cval.isSpecial())
      continue;

    TFloatList::const_iterator offi(offsets->begin());
    TFloatList::const_iterator nori(normalizers->begin());
    TFloatList::const_iterator avgi(averages->begin());
    vector<int>::const_iterator ai(attrIdx.begin()), ae(attrIdx.end());
    double *base = bases;
    radiii = radii;
    double sumex = 0.0;
    for(; ai!=ae; ai++, offi++, nori++, avgi++) {
      const TValue &val = (*ei)[*ai];
      double av;
      if (val.isSpecial())
        av = *avgi;
      else
        av = val.varType == TValue::INTVAR ? float(val.intV) : val.floatV;

      const double aval = (av - *offi) / *nori;
      pi[0] += aval * *base++;
      pi[1] += aval * *base++;
      sumex += aval * *radiii++;
    }
    if (normalizeExamples && (sumex > 0.0)) {
      pi[0] /= sumex;
      pi[1] /= sumex;
    }

    pi[2] = cval.varType == TValue::INTVAR ? float(cval.intV) : cval.floatV;
    pi += 3;

    if (contClass) {
      if (pi == projections + 3)
        minClass = maxClass = cval.floatV;
      else {
        if (cval.floatV < minClass)
          minClass = cval.floatV;
        else if (cval.floatV > maxClass)
          maxClass = cval.floatV;
      }
    }
  }
}


/* This one projects the examples; removed in favour of the one that gets the computed projections 
TP2NN::TP2NN(PDomain, double *examples, const int &nEx, double *ba, PFloatList off, PFloatList norm, PFloatList avgs, const int &alaw, const bool normalize)
: TPNN(domain, alaw, normalize)
{
  dimensions = 2;
  offsets = off;
  normalizers = norm;
  averages = avgs;

  nExamples = nEx;

  const int nAttrs = domain->attributes->size();
  TFloatList::const_iterator offi, offb = offsets->begin(), offe = offsets->end();
  TFloatList::const_iterator nori, norb = normalizers->begin(), nore = normalizers->end();

  bases = (double *)memcpy(new double[domain->attributes->size()*2], ba, domain->attributes->size()*2*sizeof(double));

  double *radiii, *radiie, *bi;
  for(radiii = radii, radiie = radii + nAttrs, bi = bases; radiii != radiie; *radiii++ = sqrt(sqr(*bi++) + sqr(*bi++)));

  projections = new double[2*nEx];
  double *pi, *pe;
  for(pi = projections, pe = projections + 3*nEx; pi != pe; *(pi++) = 0.0);

  double *example, *examplee;
  for(example = examples, examplee = examples + nExamples*(nAttrs+1), pi = projections; example != examplee; pi += 3) {
    offi = offb;
    nori = norb;
    double *base = bases;
    radiii = radii;
    double sumex = 0.0;
    for(double *ee = example + nAttrs; example != ee; example) {
      double aval = (*(example++) - *(offi++)) / *(nori++);
      pi[0] += aval * *(base++);
      pi[1] += aval * *(base++);
      if (normalizeExamples)
        sumex += aval * *radiii++;
    }
    if (normalizeExamples && (sumex > 0.0)) {
      pi[0] /= sumex;
      pi[1] /= sumex;
    }
    pi[2] = *example++;
  }
}
*/


TP2NN::TP2NN(PDomain dom, double *aprojections, const int &nEx, double *ba, PFloatList off, PFloatList norm, PFloatList avgs, const int &alaw, const bool normalize)
: TPNN(dom, alaw, normalize)
{
  dimensions = 2;
  offsets = off;
  normalizers = norm;
  averages = avgs;
  bases = ba;
  projections = aprojections;
  nExamples = nEx;

  if (bases) {
    radii = mlnew double[2*domain->attributes->size()];
    for(double *radiii = radii, *radiie = radii + domain->attributes->size(), *bi = bases;
        radiii != radiie;
        *radiii++ = sqrt(sqr(*bi++) + sqr(*bi++)));
  }
  else
    radii = NULL;

  if (dom->classVar->varType == TValue::FLOATVAR) {
    double *proj = projections+2, *proje = projections + 3*nEx + 2;
    minClass = maxClass = *proj;
    while( (proj+=3) != proje ) {
      if (*proj < minClass)
        minClass = *proj;
      else if (*proj > maxClass)
        maxClass = *proj;
    }
  }
}


TP2NN::TP2NN(const int &nAtt, const int &nEx)
: TPNN(2, nAtt, nEx)
{}

void TP2NN::project(const TExample &example, double &x, double &y)
{
  TFloatList::const_iterator offi = offsets->begin(), nori = normalizers->begin(), avgi = averages->begin();
  x = y = 0.0;
  double *base = bases, *radius = radii;
  double sumex = 0.0;

  TExample::const_iterator ei = example.begin();
  for(int attrs = example.domain->attributes->size(); attrs--; ei++, avgi++, offi++, nori++) {
    double av;
    if ((*ei).isSpecial())
      av = *avgi;
    else
      av = (*ei).varType == TValue::INTVAR ? float((*ei).intV) : (*ei).floatV;

    const double aval = (av - *offi) / *nori;
    x += aval * *(base++);
    y += aval * *(base++);
    if (normalizeExamples)
      sumex += aval * *radius++;
  }
  if (normalizeExamples) {
    x /= sumex;
    y /= sumex;
  }
}


TValue TP2NN::operator ()(const TExample &example)
{
  checkProperty(offsets);
  checkProperty(normalizers);
  checkProperty(averages);
  checkProperty(bases);
  if (normalizeExamples)
    checkProperty(radii);

  if (classVar->varType == TValue::INTVAR)
    return TClassifier::call(example);

  double x, y;
  getProjectionForClassification(example, x, y);
  return TValue(float(averageClass(x, y)));
}
    


PDistribution TP2NN::classDistribution(const TExample &example)
{
  checkProperty(offsets);
  checkProperty(normalizers);
  checkProperty(averages);
  checkProperty(bases);
  if (normalizeExamples)
    checkProperty(radii);


  double x, y;
  getProjectionForClassification(example, x, y);
  
  if (classVar->varType == TValue::FLOATVAR) {
    PContDistribution cont = mlnew TContDistribution(classVar);
    cont->addfloat(float(averageClass(x, y)));
    return cont;
  }

  else {
    const int nClasses = domain->classVar->noOfValues();
    float *cprob = mlnew float[nClasses];

    try {
      classDistribution(x, y, cprob, nClasses);
      PDiscDistribution wdist = mlnew TDiscDistribution(cprob, nClasses);
      wdist->normalize();
      return wdist;
    }
    catch (...) {
      delete cprob;
      throw;
    }
  }

  return PDistribution();
}


void TP2NN::classDistribution(const double &x, const double &y, float *distribution, const int &nClasses) const
{
  for(float *ci = distribution, *ce = distribution + nClasses; ci != ce; *ci++ = 0.0);
  double *proj = projections, *proje = projections + 3*nExamples;

  switch(law) {
    case InverseLinear:
    case Linear:
      for(; proj != proje; proj += 3) {
        const double dist = sqr(proj[0] - x) + sqr(proj[1] - y);
        distribution[int(proj[2])] += dist<1e-8 ? 1e4 : 1.0/sqrt(dist);
      }
      return;

    case InverseSquare:
      for(; proj != proje; proj += 3) {
        const double dist = sqr(proj[0] - x) + sqr(proj[1] - y);
        distribution[int(proj[2])] += dist<1e-8 ? 1e8 : 1.0/dist;
      }
      return;

    case InverseExponential:
    case KNN:
      for(; proj != proje; proj += 3) {
        const double dist = sqr(proj[0] - x) + sqr(proj[1] - y);
        distribution[int(proj[2])] += exp(-sqrt(dist));
      }
      return;
  }
}


double TP2NN::averageClass(const double &x, const double &y) const
{
  double sum = 0.0;
  double N = 0.0;
  double *proj = projections, *proje = projections + 3*nExamples;

  switch(law) {
    case InverseLinear:
    case Linear:
      for(; proj != proje; proj += 3) {
        const double dist = sqr(proj[0] - x) + sqr(proj[1] - y);
        const double w = dist<1e-8 ? 1e4 : 1.0/sqrt(dist);
        sum += w * proj[2]; 
        N += w;
      }
      break;

    case InverseSquare:
      for(; proj != proje; proj += 3) {
        const double dist = sqr(proj[0] - x) + sqr(proj[1] - y);
        const double w = dist<1e-8 ? 1e4 : 1.0/dist;
        sum += w * proj[2]; 
        N += w;
      }
      break;

    case InverseExponential:
    case KNN:
      for(; proj != proje; proj += 3) {
        const double dist = sqr(proj[0] - x) + sqr(proj[1] - y);
        const double w = dist<1e-8 ? 1e4 : exp(-sqrt(dist));
        sum += w * proj[2]; 
        N += w;
      }
      break;
  }

  return N > 1e-4 ? sum/N : 0.0;
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -