📄 svmfusvmlargeopt.cpp

📁 This is SvmFu, a package for training and testing support vector machines (SVMs). It s written in C
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
  int i;  int totFound;  int size = getSize();  int yPos = 0, yNeg = 0;  for (i = 0; i < size; i++) {    if (getY(i) == 1) { yPos++; }    else { yNeg++; }  }      int yNegDes = (yNeg*sS)/size;  int yPosDes = sS-yNegDes;    if (verbosity_ > 0) {    cout << "The training set contains " << yPos << " positive and "	 << yNeg << " negative examples." << endl;  }    // initInfo initInfoArray[2] = { { 0, sS/2+(sS%2), 0, 0, 1 },  // { 0, sS/2, 0, sS-1, -1 } };  initInfo initInfoArray[2] = { { 0, yPosDes, 0, 0, 1 },				{ 0, yNegDes, 0, sS-1, -1 } };      for (i = 0, totFound = 0; i < size; i++) {    initInfo* iip;    iip = (getY(i) == 1 ? &initInfoArray[0] : &initInfoArray[1]);          if (totFound < sS || iip->numGood < iip->maxGood) {      if (totFound >= sS) {	// We thought we might need this example in the initial	// working set, but we've now found enough examples from	// the other class, so push it onto the queue.	int elt = workingSet_[iip->startPos];	workingSetPos_[elt] = -1;	eltQueue_.push(QueueElt_(elt,0));	initInfo* iip2 = 	  (getY(i) == 1 ? &initInfoArray[1] : &initInfoArray[0]);	iip2->numFound--;      }      workingSet_[iip->startPos] = i;      workingSetPos_[i] = iip->startPos;      iip->numFound++;      iip->startPos += iip->posChange;      totFound++;    } else {      // We're already got our full complement of examples, push      // this one onto the queue.      eltQueue_.push(QueueElt_(i,0));    }          if (iip->numGood <= iip->maxGood) { iip->numGood++; }  }  // Attempt at consistency check.  if (initInfoArray[0].numGood == 0 || initInfoArray[1].numGood == 0) {    cerr << "Error: No " <<       (initInfoArray[0].numGood == 0 ? "positive" : "negative") <<       "examples in data set.  Aborting." << endl;    exit(1);  }    if (verbosity_ > 1) {    cout << "Initial chunk contains " << initInfoArray[0].numFound	 << " positive examples and " << initInfoArray[1].numFound	 << " negative examples." << endl << endl;  }}/*! Puts as many USV's as possible into the working set; * then pops enough elements off the queue to fill the working set. */CallTemplate(void, createInitialWorkingSetWarm)() {  int totSize = getSize();  int numFound = 0;  // First, put as many USVs as possible into the working set.  for (int i = 0; i < totSize; i++) {    if (unbndSupVecP(i) && numFound < chunkSize_) {      workingSet_[numFound] = i;      workingSetPos_[i] = numFound;      numFound++;    } else {      eltQueue_.push(QueueElt_(i, -KKT_ViolationAmt(i)));    }  }    // Warn if there's not enough space...  if (getNumUnbndSupVecs() > numFound) {    cerr << "WARNING:  Not enough room in chunk to hold all USVs.  Badbadbad."	 << endl;  }  // add as many more points as necessary  // set<QueueElt_>::iterator it = eltQueue_.end();  while (numFound < chunkSize_) {    QueueElt_ qe = eltQueue_.top();    eltQueue_.pop();    workingSet_[numFound] = qe.elt_;    workingSetPos_[qe.elt_] = numFound;    numFound++;  }}CallTemplate(void, createInitialWorkingSetHot)() {  int i;    for (i = 0; i < chunkSize_; i++) {    workingSet_[i] = chunkKernCache_->getColToTrnSet(i);  }  for (i = 0; i < svmSize_; i++) {    workingSetPos_[i] = chunkKernCache_->getTrnSetToCol(i);  }    // Check to make sure it's ok.  for (i = 0; i < chunkSize_; i++) {    if (workingSet_[i] == -1) {      createInitialWorkingSetCold();      return;    }  }  // Otherwise, push all the unused element uinto the queue.  for (i = 0; i < svmSize_; i++) {    if (workingSetPos_[i] == -1) {      eltQueue_.push(QueueElt_(i, KKT_ViolationAmt(i)));    }  }}CallTemplate(void, buildAndSolveChunk)() {  chunkSvm_ =     new SvmSmallOpt<DataPt, KernVal>    (chunkSize_, chunkY_, chunkTrnSetPtr_, kernProdFuncPtr_,     10, tol_, getEpsilon(), chunkKernCache_);  chunkSvm_->setCVec(chunkC_Vec_);  chunkSvm_->setB(getB());  chunkSvm_->setAllAlphas(chunkAlphas_);      // At THIS point in the code, outputs_[ex] is correct  // for all points IN THE WORKING SET, so we can use this to determine  // the offsets.  DoubleVec offsets = new double[chunkSize_];  for (int i = 0; i < chunkSize_; i++) {    offsets[i] = outputs_[workingSet_[i]] -       chunkSvm_->outputAtTrainingExample(i);  }  chunkSvm_->setOffsetVec(offsets);  chunkSvm_->optimize();    verbosity_ > 0 ? chunkSvm_->fixB() : chunkSvm_->fixB(false);  majorIterations_++;  if (verbosity_ > 0) {    cout << "CHUNK OPTIMIZATION " << majorIterations_ << " COMPLETE (#USVs=" 	 << chunkSvm_->getNumUnbndSupVecs() << ", #SVs=" 	 << chunkSvm_->getNumSupVecs() << ")" << endl << endl;  }  delete[] offsets;}  CallTemplate(void, destroyChunk)() {  delete chunkSvm_;}/*! \fn void SvmLargeOpt<DataPt, KernVal>::updateAlphasAndB () * * (1) Clears removal queue<br> * (2) Copies alphas and B out of our SvmSmallOpt * (3) Sorts the working set into removalQueue_ based *     on KKT_ViolationAmt  */CallTemplate(void, updateAlphasAndB)() {  int i;  // Clear removalQueue_ of old values (if any)  while (removalQueue_.size()) { removalQueue_.pop(); }  // removalQueue_.clear();  AlphaDiffElt_ *alphaDiffs = new AlphaDiffElt_[chunkSize_];  DoubleVec chunkAlphas = chunkSvm_->getAllAlphas();    double phi = 0;  double bDiff = chunkSvm_->getB() - getB();  // Delete excess rows from kernelCache;  double eps = getEpsilon();  alphaDiffHistory_.push_back(alphaDiffs);    int cnt = 0;  for (i = 0; i < chunkSize_; i++) {    chunkAlphas_[i] = chunkAlphas[i]; // temporary to permanent    int ex = workingSet_[i];    double diff = chunkAlphas[i] - getAlpha(ex);    if (fabs(diff) > eps) {      alphaDiffs[cnt].elt_ = ex;      alphaDiffs[cnt].diff_ = diff;                  phi += getY(ex)*diff*(chunkSvm_->outputAtTrainingExample(i)-outputs_[ex]-bDiff);      cnt++;    }    setAlpha(ex, chunkAlphas[i]);    outputs_[ex] = chunkSvm_->outputAtTrainingExample(i);    lastCheckIter_[ex] = majorIterations_;    // Now place an element on the removal queue.    double KKT_Dist = KKT_ViolationAmt(ex);    // Add 10 to remove "zero" elements first.  Kludge.    removalQueue_.push(QueueElt_(ex, -KKT_Dist + (getAlpha(ex) < eps ? 10 : 0)));  }  alphaDiffLengths_.push_back(cnt);      delete[] chunkAlphas;      // If we're actually done, the b from the chunk will be the  // same as the b implied by an USV's not in the chunk (hopefully,  // there won't be any of those anyway...)  bHistory_.push_back(getB()); // Put the OLD value of b in history  setB(chunkSvm_->getB());  // cout << "PHI = " << phi << endl;  phis_.push_back(phi);  if (usingLinear_) {    int i;    for (i = 0; i < linDims_; i++) {      w_[i] = 0;    }        int size = getSize();    double eps = getEpsilon();    for (i = 0; i < size; i++) {      if (getAlpha(i) > eps) {	addToWPtr_(w_, getTrainingExample(i), getY(i)*getAlpha(i));      }    }  }}CallTemplate(double, outputAtTrainingExample)(int ex) const {  updateNumber_++;  int i, e;  int size = getSize();      if (updateNumber_ >= SVM_RESET_UPDATES_HOW_OFTEN) {    updateNumber_ = 1;    for (i = 0; i < size; i++) {      updateHelper_[i] = 0;    }  }  // if we have to recalculate the output for this point  if (lastCheckIter_[ex] != majorIterations_) {     if (usingLinear_) {      outputs_[ex] = multWPtr_(w_, getTrainingExample(ex)) + getB();     } else {      int lC = lastCheckIter_[ex];            // for each iteration since we last updated      while (lC < majorIterations_) {	AlphaDiffElt_ *aD = alphaDiffHistory_[lC];		// for each diff in this iteration, update the output for this point	for (i = 0; i < alphaDiffLengths_[lC]; i++) {	  e = aD[i].elt_;	  	  if (updateHelper_[e] != updateNumber_) {	    tempKernProds_[e] = chunkKernCache_->trnSetKernProd(ex, e);	    updateHelper_[e] = updateNumber_;	  }	  	  outputs_[ex] += getY(e)*aD[i].diff_*tempKernProds_[e];	}		lC++;      }          outputs_[ex] -= bHistory_[lastCheckIter_[ex]];      outputs_[ex] += getB();    }        lastCheckIter_[ex] = majorIterations_;  }      return outputs_[ex];}/*! \fn SvmLargeOpt<DataPt, KernVal>::KKT_ViolationAmt (int) * * Returns KKT violation for the specified point in the training set */CallTemplate(double, KKT_ViolationAmt)(int ex) const {  // Added this bit so that if a point has a C of 0, it will always  // return a violation of zero and won't be added to a working set.  // This makes sense because even if C=0 for a point, that point ALWAYS  // satisfies the optimality conditions, since its alpha can't move.  // We don't even need to do the computation here.  double C = getC(ex);  double eps = getEpsilon();  if (C < eps) { return 0; }  double yerr = getY(ex)*outputAtTrainingExample(ex);  double alpha = getAlpha(ex);  if (alpha < eps) {     return 1 - yerr;   } else if (alpha > C - eps) {    return yerr - 1;  } else {    return fabs(yerr-1);  }}/*! \fn SvmLargeOpt<DataPt, KernVal>::pivotWorkingSet (int, int) * * (1) Update chunk data structures to reflect the swap<br> * (2) Adds cells for the point to the kernel cache */CallTemplate(void, pivotWorkingSet)(int pointToAdd, 				    int pointToRemove) {  int wsPos = workingSetPos_[pointToRemove];  ///////////////////  // error checking  ///////////////////  if (wsPos < 0 || wsPos > chunkSize_) {    cerr << "ERROR!  wsPos out of range: " << wsPos << endl;    exit(1);  }  if (workingSetPos_[pointToAdd] != -1) {    cerr << pointToAdd << " is ALREADY in wset!" << endl;    exit(1);  }  if (pointToAdd < 0 || pointToAdd > getSize()) {    cerr << "POINT TO ADD RANGE: " << pointToAdd << endl;    exit(1);  }  if (pointToRemove < 0 || pointToRemove > getSize()) {    cerr << "POINT TO Remove RANGE: " << pointToRemove << endl;    exit(1);  }    ///////////////////////////////////////  // update working set data structures  ///////////////////////////////////////  workingSetPos_[pointToAdd] = wsPos;  workingSetPos_[pointToRemove] = -1;  workingSet_[wsPos] = pointToAdd;      chunkY_[wsPos] = getY(pointToAdd);  chunkC_Vec_[wsPos] = getC(pointToAdd);  chunkTrnSetPtr_[wsPos] = getTrainingExample(pointToAdd);  chunkAlphas_[wsPos] = getAlpha(pointToAdd);}CallTemplate(double, dualObjFunc)() const {  double b = getB();  double result = 0.0;  int size = getSize();      for (int i = 0; i < size; i++) {    result += getAlpha(i)*(1-.5*getY(i)*(outputs_[i]-b));  }      return result;}CallTemplate(void, getCurrentOutputVals)(DoubleVec outputVals) {  int size = getSize();    for (int i = 0; i < size; i++) {    outputVals[i] = outputAtTrainingExample(i);  }}CallTemplate(int, computeLeaveOneOutErrors)(DoubleVec LOO_Vals) {  int size = getSize();  int i;    double eps = getEpsilon();  BoolVec LOO_CompNeededP_ = new bool[size];  if (!optimized_) {    cerr << "Error:  Cannot compute Leave-One-Out Errors on an unoptimized SVM." << endl;    return 0;  }  if (verbosity_ > 0) {    cout << "Computing Leave-One-Out Errors." << endl;  }    // Update all the outputs, just in case.  This can be necessary because  // of the phis.  A little more care could possibly save some time here,  // but this is simpler and correct.  computeTrainingSetPerf(false);    int compsNeeded = 0;  for (i = 0; i < size; i++) {    if (getAlpha(i) < eps) {      LOO_CompNeededP_[i] = false;      LOO_Vals[i] = outputAtTrainingExample(i);    } else {      compsNeeded++;      LOO_CompNeededP_[i] = true;      LOO_Vals[i] = 0;    }  }  int numErrors = computeLeaveOneOutErrorsInternal(LOO_CompNeededP_, LOO_Vals);  delete[] LOO_CompNeededP_;  if (verbosity_ > 0) {    cout << "Done." << endl;  }    return numErrors;}CallTemplate(int, computeLeaveOneOutErrorsInternal)(BoolVec LOO_CompNeededP,						    DoubleVec LOO_Vals) {  int size = getSize();  int i, j, k;  int numErrors = 0;  double eps = getEpsilon();  int workSetPos;    int oldVerbosity = verbosity_;  verbosity_ = 0;  for (i = 0; i < size; i++) {    if (LOO_CompNeededP[i]) {      double oldC = getC(i);      double alphaRemains = getAlpha(i);      int yI = getY(i);      double aI = getAlpha(i);      // Delete the alpha from all the cached outputs      for (j = 0; j < size; j++) {	outputs_[j] -= yI*aI*chunkKernCache_->trnSetKernProd(i,j);      }      j = -1;      while (alphaRemains > eps) {	j++;	if (i == j) { continue; }	if (j >= size) {	  cerr << "Error: Couldn't adjust solution in LOO computation.  Aborting."	       << endl;	  exit(-1);	}	double cJ = getC(j);	double aJ = getAlpha(j);	int yJ = getY(j);	double diffAvail = 0;	if (yJ == yI && aJ < cJ-eps) { diffAvail = cJ - aJ; }	if (yJ != yI && aJ > eps) { diffAvail = aJ; }		diffAvail = diffAvail > alphaRemains ? alphaRemains : diffAvail;	if (diffAvail > eps) {	  for (k = 0; k < size; k++) {	    outputs_[k] += yJ*(yI*yJ*diffAvail)*chunkKernCache_->trnSetKernProd(j,k);	  }	  setAlpha(j, aJ + yI*yJ*diffAvail);	  workSetPos = chunkKernCache_->getTrnSetToCol(j);	  if (workSetPos != -1) {	    chunkAlphas_[workSetPos] += yI*yJ*diffAvail;	  }	  alphaRemains -= diffAvail;	}      }      setAlpha(i,0);      setC(i, 0);      workSetPos = chunkKernCache_->getTrnSetToCol(i);      if (workSetPos != -1) {	chunkC_Vec_[workSetPos] = 0;	chunkAlphas_[workSetPos] = 0;      }      optimized_ = false;      reoptimize();            double output = outputAtTrainingExample(i);      if (yI*output <= 0) { numErrors++; }      LOO_Vals[i] = output;            setC(i, oldC);      if (workSetPos != -1) {	chunkC_Vec_[workSetPos] = oldC;      }    }  }  // Get SVM back to "original" solved state.  optimized_ = false;  reoptimize();  verbosity_ = oldVerbosity;  return numErrors;}#include "SvmFuSvmDataPoint.h"#define IterateTypes(datatype, kerntype) \    template class SvmLargeOpt<DataPoint<datatype>, kerntype>;#include "SvmFuSvmTypes.h"//template class vector<QueueElt_>;//    extern template class SvmBase<DataPoint<datatype>, kerntype>; //    extern template class SvmSmallOpt<DataPoint<datatype>, kerntype>;
上一页 12
💿 文件大小 110 K
👤 上传用户 renyumeng
📂 所属分类其他
🏷️ 相关标签

#machines #training #package #testing
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -