⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 decomposition.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 3 页
字号:
class TDistRec {
public:
  float dist;
  long randoff;
  vector<TCI_w>::iterator exPointer;

  TDistRec(vector<TCI_w>::iterator ep, const int &roff, const float &adist)
  : dist(adist),
    randoff(roff),
    exPointer(ep)
  {};

  bool operator <(const TDistRec &other) const
  { return (dist==other.dist) ? (randoff<other.randoff) : (dist<other.dist); }
  bool operator !=(const TDistRec &other) const
  { return (dist!=other.dist) || (randoff!=other.randoff); }
};


TIMByRowsByRelief::TIMByRowsByRelief()
: k(10),
  m(50),
  kFromColumns(0.0),
  ignoreSameExample(false),
  convertToBinary(false),
  correctClassFirst(false),
  allExamples(false),
  allSameNeighbours(false)
{}


PIMByRows TIMByRowsByRelief::operator()(PExampleGenerator gen, const vector<bool> &, const TVarList &aboundSet, const vector<bool> &free, const int &weightID)
{
  int classes =   (gen->domain->classVar && (gen->domain->classVar->varType==TValue::INTVAR))
                ? gen->domain->classVar->noOfValues()
                : -1;

  if (classes==-1)
    raiseError("these is no class or it is not discrete.");

  TRandomGenerator rgen(gen->numberOfExamples());

  PDomain boundDomain=mlnew TDomain(PVariable(), aboundSet);
  PDomain freeDomain;
  {
    TVarList freeSet;
    { TVarList::const_iterator vi(gen->domain->attributes->begin());
      const_ITERATE(vector<bool>, fi, free) {
        if (*fi)
          freeSet.push_back(*vi);
        vi++;
      }
    }
    freeDomain=mlnew TDomain(gen->domain->classVar, freeSet);
  }

  vector<int> values;
  int columns = 1;
  const_ITERATE(TVarList, bvi, aboundSet) {
    int tvalues=(*bvi)->noOfValues();
    values.push_back(tvalues);
    columns*=tvalues;
  }

  PIMByRows im = mlnew TIMByRows(gen->domain->classVar->varType);
  im->columnExamples = vector<PExample>(columns, PExample());
  float myK = (kFromColumns>0.0) ? kFromColumns*columns : k;

  // converts examples to freeDomain
  // and divides them info classTables
  TExampleTable freeExamples(freeDomain);
  vector<vector<TCI_w> > classTables;
  vector<float> gN;
  for(int cl = classes; cl--; ) {
    classTables.push_back(vector<TCI_w>());
    gN.push_back(0.0);
  }

  {
    int freeIndex = 0;
    PEITERATE(ei, gen) {
      if ((*ei).getClass().isSpecial())
        continue;

      TExample boundExample(boundDomain, *ei);
      int ci = 0;
      for (int i = 0, vsize = values.size(); i<vsize; i++)
        if (boundExample[i].isSpecial())
          raiseError("attribute '%s' has undefined values", aboundSet[i]->name.c_str());
        else
          ci = ci*values[i]+boundExample[i].intV;

      TExample freeExample(freeDomain, *ei);
      if (weightID)
        freeExample.setMeta(weightID, TValue(WEIGHT(*ei)));
      freeExamples.addExample(*ei);
      int classIndex = (*ei).getClass().intV;
      classTables[classIndex].push_back(TCI_w(ci, freeIndex++));
      gN[classIndex] += WEIGHT(*ei);

      if (!im->columnExamples[ci])
        im->columnExamples[ci]=mlnew TExample(boundDomain, boundExample);
    }
  }

  PExamplesDistance_Relief useDistance = TExamplesDistanceConstructor_Relief()(PExampleGenerator(freeExamples), weightID);

  float gNtot = 0.0;
  ITERATE(vector<float>, gi, gN)
    gNtot += *gi;
  
  // the total number of examples
  long N = freeExamples.numberOfExamples();
  float actualN = 0;

  long eNum = -1;

  bool myAllExamples = allExamples || (m>N);

  for(float referenceExamples = 0, refWeight; myAllExamples ? (eNum+1<N) : (referenceExamples<m); referenceExamples+=refWeight) {
    // choose a random or consecutive example
    // This is probably not correct - examples with lower weights should have less chance to
    // be chosen. Neither multiplying the line with a low weight does not amortize for this
    // since this same example can be chosen on and on...
    eNum = myAllExamples ? eNum+1 : rgen.randlong(N);

    TExample &example = freeExamples[eNum];
    int eClass = example.getClass();
    refWeight = WEIGHT(example);

    im->rows.push_back(TDIMRow(mlnew TExample(example), columns, classes));

    // for each class
    for(int oClass = 0; oClass<classes; oClass++) 
      if (classTables[oClass].size()>0) {

        int adjustedClassIndex;
        if (convertToBinary)
          adjustedClassIndex = (oClass==eClass) ? 0 : 1;
        else if (correctClassFirst)
          adjustedClassIndex= (oClass==eClass) ? 0
                                               : ( (oClass>eClass) ? oClass : oClass+1);
        else
          adjustedClassIndex=oClass;

        // sort the examples by the distance
        set<TDistRec> neighset;
        ITERATE(vector<TCI_w>, epi, classTables[oClass])
          neighset.insert(TDistRec(epi, rgen.randlong(), useDistance->operator()(example, freeExamples[(*epi).freeIndex])));

        float classWeight = adjustedClassIndex ? gN[oClass] / (gNtot-gN[eClass])  :  1.0;

        set<TDistRec>::iterator ni(neighset.begin()), ne(neighset.end());

        if (ignoreSameExample)
          while(((*ni).dist<=0) && (ni!=ne))
            ni++;

        for(float needwei = myK, compWeight; (ni!=ne) && ((needwei>0) || allSameNeighbours && ((*ni).dist<=0)); needwei-=compWeight, ni++) {
          // determine the weight for the current example
          TCI_w &CI_w =* (*ni).exPointer;
          compWeight = WEIGHT(freeExamples[CI_w.freeIndex]);
          if (compWeight>needwei)
            compWeight = needwei;
          float koe = refWeight*compWeight*classWeight;
          actualN += fabs(koe);
          im->rows.back().nodes[CI_w.columnIndex][adjustedClassIndex] += koe;
        }
      }
  }

  return im;
}



TIMBlurer::TIMBlurer(const float &w, const float &ow, const bool &aw, const bool &oe)
: weight(w),
  origWeight(ow),
  adjustOrigWeight(aw),
  onlyEmpty(oe)
{}


TIMBlurer::TIMBlurer(PFloatList aaw, const float &ow, const bool &aw, const bool &oe)
: weight(-1),
  origWeight(ow),
  attrWeights(aaw),
  adjustOrigWeight(aw),
  onlyEmpty(oe)
{}


bool TIMBlurer::operator()(PIMByRows im)
{ TVarList &attributes = im->rows.front().example->domain->attributes.getReference();
  int attrs = attributes.size();
  int columns = im->rows.front().nodes.size();
  int classes = im->rows.front().noOfValues;
  float actOrigWeight;

  PFloatList myAttrWeights;

  if (weight>0.0) {
    if (weight>1.0)
      raiseError("weight is %5.3f; it should be lower than 1.0", weight);
    myAttrWeights = mlnew TFloatList(attrs, weight);
    actOrigWeight = adjustOrigWeight ? 1.0-weight : origWeight;
  }
  else {
    if (attrWeights && int(attrWeights->size())!=attrs)
      raiseError("invalid 'attrWeights' (size does not match the number of attributes)");

    myAttrWeights = attrWeights;
    if (adjustOrigWeight) {
      float sum = 0.0;
      PITERATE(TFloatList, wi, attrWeights)
        sum += *wi;
      actOrigWeight = sum>1.0 ? origWeight : 1.0-sum;
    }

    else
      actOrigWeight=origWeight;
  }

  vector<vector<float *> > impose;
  impose.reserve(im->rows.size());

  vector<TDIMRow *> *sortedRows = mlnew vector<TDIMRow *>();
  sortedRows->reserve(im->rows.size());

  ITERATE(vector<TDIMRow>, ri, im->rows) {
    impose.push_back(vector<float *>(columns, (float *)NULL));
    for(int i = columns; i; ) {
      float *fc = mlnew float[classes];
      impose.back()[--i] = fc;
      for(float *fe = fc+classes; fc!=fe; *(fc++) = 0.0);
    }
    sortedRows->push_back(& *ri);
  }
  const TDIMRow *firstRow = &im->rows.front();

  for(int attr=0; attr<attrs; attr++) { // we trust it is already sorted

    float attrWeight = myAttrWeights->at(attr);
    if (attrWeight>=0.0) {

      bool ordered = attributes[attr]->ordered;

      // impose by attr
      for(vector<TDIMRow *>::const_iterator grp_begin(sortedRows->begin()), grp_in1, grp_in2, grp_end, grp_totend(sortedRows->end());
          grp_begin!=grp_totend;
          grp_begin=grp_end) {

        // find the end of the group
        grp_end=grp_begin;
        while(++grp_end!=grp_totend) {
          TExample::const_iterator ei1=(*grp_begin)->example->begin(), ei2=(*grp_end)->example->begin(), ei1e=(*grp_begin)->example->end();
          for(int attrNo=0; (ei1!=ei1e) && ((attrNo==attr) || (*ei1==*ei2)); ei1++, ei2++, attrNo++);
          if (ei1!=ei1e)
            break;
        }

        for(grp_in1=grp_begin; grp_in1!=grp_end-1; grp_in1++)
          for(grp_in2=grp_in1+1; grp_in2!=grp_end; grp_in2++) {
            if (ordered) {
              int dif=(*grp_in1)->example->operator[](attr).intV - (*grp_in2)->example->operator[](attr).intV;
              if ((dif!=1) && (dif!=-1))
                continue;
            }
            else
              if ((*grp_in1)->example->operator[](attr).intV == (*grp_in2)->example->operator[](attr).intV)
                continue;
           
            { vector<float *>::iterator odi((*grp_in1)->nodes.begin());
              vector<float *> &imp = impose[*grp_in2-firstRow];
              for(vector<float *>::iterator idi(imp.begin()), ide(imp.end()); idi!=ide; idi++, odi++)
                for (float *idii= *idi, *idie = idii+classes, *odii = *odi; idii!=idie; *(idii++) += *(odii++)*attrWeight);
            }

            { vector<float *>::iterator odi((*grp_in2)->nodes.begin());
              vector<float *> &imp = impose[*grp_in1-firstRow];
              for(vector<float *>::iterator idi(imp.begin()), ide(imp.end()); idi!=ide; idi++, odi++)
                for (float *idii= *idi, *idie = idii+classes, *odii= *odi; idii!=idie; *(idii++) += *(odii++)*attrWeight);
            }
          }
      }
    }

    // sort by attribute attr
    vector<int> valf(attributes[attr]->noOfValues(), 0);
    ITERATE(vector<TDIMRow *>, ii, *sortedRows) {
      TValue &val=(*ii)->example->operator[](attr);
      if (val.isSpecial())
        raiseError("attribute '%s' has undefined values", attributes[attr]->name.c_str());
      valf[val.intV]++;
    }

    int id=0;
    for(vector<int>::iterator ni=valf.begin(); ni!=valf.end(); *(ni++)=(id+=*ni)-*ni);

    vector<TDIMRow *> *newPtrs=mlnew vector<TDIMRow *>(sortedRows->size(), (TDIMRow *)NULL);
    ITERATE(vector<TDIMRow *>, si, *sortedRows)
      (*newPtrs)[valf[(*si)->example->operator[](attr).intV]++] = *si;

    mldelete sortedRows;
    sortedRows=newPtrs;

  }

  mldelete sortedRows;

  vector<TDIMRow>::iterator oi=im->rows.begin();

  ITERATE(vector<vector<float *> >, ii, impose) {
    for(vector<float *>::iterator odi((*oi).nodes.begin()), idi((*ii).begin()), ide((*ii).end()); idi!=ide; idi++, odi++) {
      float *di, *de;
      if (onlyEmpty) {
        for (di = *odi, de = di+classes; (di!=de) && !*di; di++);
        if (di==de)
          continue;
      }
      float *idii;
      for (di = *odi, de = di+classes, idii = *idi; di!=de; *(di++) += *(idii++));
    }
    oi++;
  }

  return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -