⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 decomposition.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/


#include "stladdon.hpp"

#include "vars.hpp"
#include "domain.hpp"
#include "examples.hpp"
#include "table.hpp"

// the following are here just for IMByRowsByRelief -- move it to a separate file?
#include "distance.hpp"
#include "random.hpp"
#include <set>

#include "decomposition.ppp"


TExample_nodeIndex::TExample_nodeIndex(PExample ex)
  : example(ex), nodeIndex(0)
  {}



TSortedExamples_nodeIndices::TSortedExamples_nodeIndices(PExampleGenerator eg, const vector<bool> &bound, const vector<bool> &free)
// We need to make a copy of examples since we shall store pointers to them.
: exampleTable(mlnew TExampleTable(eg)),
  maxIndex(-1)
{ 
  // Copy pointers to examples to a vector
  vector<TExample_nodeIndex> eni;
  { eni.reserve(exampleTable->numberOfExamples());
    PEITERATE(ei, exampleTable)
      if (!(*ei).getClass().isSpecial())
      // We wrap references to examples; exampleTable will live at least as long as those references need to
        eni.push_back(TExample_nodeIndex(PExample(*ei)));
  }

  // copy pointers to a table for sorting
  vector<TEnIIterator> *sorting = mlnew vector<TEnIIterator>();
  try {
    { sorting->reserve(eni.size());
      ITERATE(vector<TExample_nodeIndex>, enii, eni)
        sorting->push_back(enii);
    }

    /* Sorts the vector according to values of bound attributes,
       and assigns indices by multiplications.
       We use a combination of bucket and counting sort with linear time complexity. */
    int mult=1;
    { TVarList::iterator attr = eg->domain->attributes->begin();
      const_ITERATE(vector<bool>, bi, bound) {
        if (*bi) {
          if ((*attr)->varType!=TValue::INTVAR)
            raiseError("bound attribute '%s' is not discrete", (*attr)->name.c_str());

          int values=(*attr)->noOfValues();
          if (values<=0)
            raiseError("attribute '%s' has invalid number of values", (*attr)->name.c_str());

          sortByAttr_Mult(bi-bound.begin(), sorting, values);
          mult *= values;
        }
        attr++;
      }
    }

    // sort by class and free attributes
    { sortByAttr(eg->domain->attributes->size(), sorting, eg->domain->classVar->noOfValues());
      TVarList::iterator attr = eg->domain->attributes->begin();
      const_ITERATE(vector<bool>, fi, free) {
        if (*fi) {
          if ((*attr)->varType != TValue::INTVAR)
            raiseError("free attribute '%s' is not discrete", (*attr)->name.c_str());
          sortByAttr(fi-free.begin(), sorting, (*attr)->noOfValues());
        }
        attr++;
      }
    }

    // initialize the vector
    { reserve(sorting->size());
      ITERATE(vector<TEnIIterator>, enii, *sorting)
        push_back(**enii); }

    // compress the index table (remove the unoccupied numbers)
    { vector<int> indices(mult, 0);
      { this_ITERATE(ei)
          indices[(*ei).nodeIndex]++; }
      ITERATE(vector<int>, bi, indices)
        if (*bi>0)
          *bi = ++maxIndex;
      { this_ITERATE(ei)
          (*ei).nodeIndex=indices[(*ei).nodeIndex]; }
    }
  }
  catch (exception) {
    mldelete sorting;
    throw;
  }

  mldelete sorting;
}

        
void TSortedExamples_nodeIndices::sortByAttr_Mult(int attrNo, vector<TEnIIterator> *&sorting, int values)
{ vector<int> valf(values, 0);
  ITERATE(vector<TEnIIterator>, ii, *sorting) {
    TValue &val = (*ii)->example->operator[](attrNo);
    if (val.isSpecial())
      raiseError("attribute '%s' has undefined values", (*ii)->example->domain->getVar(attrNo)->name.c_str());
    valf[val.intV]++;
  }

  int id = 0;
  for(vector<int>::iterator ni(valf.begin()), ne(valf.end()); ni!=ne; *(ni++) = (id+=*ni)-*ni);

  vector<TEnIIterator> *newPtrs = mlnew vector<TEnIIterator>(sorting->size(), sorting->front());
  ITERATE(vector<TEnIIterator>, si, *sorting) {
    int valind=(*si)->example->operator[](attrNo).intV;
    (*newPtrs)[valf[valind]++] = *si;
    (**si).nodeIndex = (**si).nodeIndex * values + valind;
  }

  mldelete sorting;
  sorting = newPtrs;
}


void TSortedExamples_nodeIndices::sortByAttr(int attrNo, vector<TEnIIterator> *&sorting, int values)
{ vector<int> valf(values, 0);
  ITERATE(vector<TEnIIterator>, ii, *sorting) {
    TValue &val = (*ii)->example->operator[](attrNo);
    if (val.isSpecial())
      raiseError("attribute '%s' has undefined values", (*ii)->example->domain->getVar(attrNo)->name.c_str());
    valf[val.intV]++;
  }

  int id = 0;
  for(vector<int>::iterator ni=valf.begin(); ni!=valf.end(); *(ni++) = (id+=*ni)-*ni);

  vector<TEnIIterator> *newPtrs = mlnew vector<TEnIIterator>(sorting->size(), sorting->front());
  ITERATE(vector<TEnIIterator>, si, *sorting)
    (*newPtrs)[valf[(*si)->example->operator[](attrNo).intV]++] = *si;

  mldelete sorting;
  sorting=newPtrs;
}


TIMColumnNode::TIMColumnNode(const int &anind, TIMColumnNode *anext, float nerr)
: index(anind),
  next(anext),
  nodeQuality(nerr)
{}


TIMColumnNode::~TIMColumnNode()
{ while(next) {
    TIMColumnNode *nn=next->next;
    next->next=NULL; // prevents stack overflow when the list deletes itself
    mldelete next;
    next=nn;
  }
}


TDIMColumnNode::TDIMColumnNode(const int &anind, const int &noofval, float *adist, TIMColumnNode *anext)
: TIMColumnNode(anind, anext),
  noOfValues(noofval),
  distribution(adist ? adist : mlnew float[noofval])
{ if (adist)
    computeabs();
  else {
    float *di = distribution;
    for(int c = noOfValues; c--; *(di++) = 0.0); 
    abs = -1.0;
  }
}


TDIMColumnNode::~TDIMColumnNode()
{ mldelete distribution; }


TIMColumnNode &TDIMColumnNode::operator += (const TIMColumnNode &other)
{ float *di = distribution, *de = distribution+noOfValues;
  const float *ddi = dynamic_cast<const TDIMColumnNode &>(other).distribution;
  while (di!=de)
    *(di++) += *(ddi++);
  return *this;
}


TFIMColumnNode::TFIMColumnNode(int anind, TIMColumnNode *anext, const float asum, const float asum2, const float aN)
: TIMColumnNode(anind, anext),
  sum(asum),
  sum2(asum2),
  N(aN)
{}


void TFIMColumnNode::add(const float &value, const float weight)
{ sum  += weight*value;
  sum2 += weight*value*value;
  N += weight;
}

TIMColumnNode &TFIMColumnNode::operator += (const TIMColumnNode &other)
{ TFIMColumnNode const &nde = dynamic_cast<const TFIMColumnNode &>(other);
  sum += nde.sum;
  sum2+= nde.sum2;
  N += nde.N;
  return *this;
}


T_ExampleIMColumnNode::T_ExampleIMColumnNode(PExample anexample, TIMColumnNode *anode)
: example(anexample),
  column(anode)
{}


T_ExampleIMColumnNode::T_ExampleIMColumnNode(const T_ExampleIMColumnNode &other)
: example(other.example),
  column(other.column)
{ const_cast<T_ExampleIMColumnNode &>(other).column = NULL; }


T_ExampleIMColumnNode::~T_ExampleIMColumnNode()
{ mldelete column; }


T_ExampleIMColumnNode &T_ExampleIMColumnNode::operator =(const T_ExampleIMColumnNode &other)
{ example = other.example;
  column = other.column;
  const_cast<T_ExampleIMColumnNode &>(other).column=NULL;
  return *this;
}



TIM::TIM(const int &avarType)
: varType(avarType),
  columns()
{}


int TIM::traverse(visitproc visit, void *arg) const
{ TRAVERSE(TOrange::traverse);
  const_ITERATE(vector<T_ExampleIMColumnNode>, pi, columns)
    PVISIT((*pi).example);
  return 0;
}


int TIM::dropReferences()
{ DROPREFERENCES(TOrange::dropReferences);
  columns.clear();
  return 0;
}


bool TIM::fuzzy()
{ ITERATE(vector<T_ExampleIMColumnNode>, ci, columns)
    if (varType==TValue::INTVAR)
      for(TDIMColumnNode *dnode=dynamic_cast<TDIMColumnNode *>((*ci).column); dnode; dnode = (TDIMColumnNode *)(dnode->next)) {
        float *di=dnode->distribution;
        int c;
        for (c = dnode->noOfValues; c--; di++)
          if ((*di!=0.0) && (*di != dnode->abs))
            return true; // if !c, all is empty or only the last is non-empty
    }
    else
      for(TFIMColumnNode *fnode = dynamic_cast<TFIMColumnNode *>((*ci).column); fnode; fnode = (TFIMColumnNode *)(fnode->next))
        if (fnode->N*fnode->sum2 != sqr(fnode->sum))
          return true;

  return false;
}


TIMConstructor::TIMConstructor(const bool &anRE)
: recordRowExamples(anRE)
{}


PIM TIMConstructor::operator()(PExampleGenerator gen, const TVarList &aboundSet, const int &weightID)
{
  // Identify bound attributes
  vector<bool> bound = vector<bool>(gen->domain->attributes->size(), false);
  vector<bool> free = vector<bool>(gen->domain->attributes->size(), true);
  const_ITERATE(TVarList, evi, aboundSet) {
    int vn = gen->domain->getVarNum(*evi);
    bound[vn] = true;
    free[vn] = false;
  }

  return operator()(gen, bound, aboundSet, free, weightID);
}


PIM TIMConstructor::operator()(PExampleGenerator gen, const TVarList &aboundSet, const TVarList &afreeSet, const int &weightID)
{
  // Identify bound attributes
  vector<bool> bound=vector<bool>(gen->domain->attributes->size(), false);
  { const_ITERATE(TVarList, evi, aboundSet)
      bound[gen->domain->getVarNum(*evi)] = true;
  }

  vector<bool> free=vector<bool>(gen->domain->attributes->size(), false);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -