⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svmfusvmlargeopt.cpp

📁 This is SvmFu, a package for training and testing support vector machines (SVMs). It s written in C
💻 CPP
📖 第 1 页 / 共 2 页
字号:
//     This is a part of the SvmFu library, a library for training //     Support Vector Machines.//      //     Copyright (C) 2000  rif and MIT////     Contact: rif@mit.edu//     This program is free software; you can redistribute it and/or//     modify it under the terms of the GNU General Public License as//     published by the Free Software Foundation; either version 2 of//     the License, or (at your option) any later version.//     This program is distributed in the hope that it will be useful,//     but WITHOUT ANY WARRANTY; without even the implied warranty of//     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//     GNU General Public License for more details.//     You should have received a copy of the GNU General Public//     License along with this program; if not, write to the Free//     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,//     MA 02111-1307 USA#include "SvmFuSvmLargeOpt.h"#ifdef CallTemplate#undef CallTemplate#endif#define CallTemplate(rettype, funcname) \template<class DataPt, class KernVal> \rettype SvmLargeOpt<DataPt, KernVal>::funcname#ifdef InlineTemplate#undef InlineTemplate#endif#define InlineTemplate(rettype, funcname) \template<class DataPt, class KernVal> \rettype SvmLargeOpt<DataPt, KernVal>::funcname//! The standard, cold-start constructor, no extraCacheRows argtemplate <class DataPt, class KernVal>SvmLargeOpt<DataPt, KernVal>::SvmLargeOpt(int svmSize, const IntVec y, DataPt *trnSetPtr, const KernVal (*kernProdFuncPtr)(const DataPt &pt1,				  const DataPt &pt2), int chunkSize, double C, double tol, double eps, int verbosity)  : SvmBase<DataPt, KernVal>(svmSize, y, trnSetPtr,			     kernProdFuncPtr, C, eps),  chunkSize_(chunkSize), tol_(tol), majorIterations_(0), updateNumber_(1),   optimized_(false), verbosity_(verbosity), startType_(coldStart),  extraCacheRows_(0){  initInternal();}//! The standard, cold-start constructor, with an extraCacheRows argtemplate <class DataPt, class KernVal>SvmLargeOpt<DataPt, KernVal>::SvmLargeOpt(int svmSize, const IntVec y, DataPt *trnSetPtr, int extraCacheRows, const KernVal (*kernProdFuncPtr)(const DataPt &pt1,				  const DataPt &pt2), int chunkSize, double C, double tol, double eps, int verbosity)  : SvmBase<DataPt, KernVal>(svmSize, y, trnSetPtr,			     kernProdFuncPtr, C, eps),  chunkSize_(chunkSize), tol_(tol), majorIterations_(0), updateNumber_(1),   optimized_(false), verbosity_(verbosity), startType_(coldStart),  extraCacheRows_(extraCacheRows){  initInternal();}//! The warm-start constructor: you have a guess for alpha and b,// and outputs???  I don't understand this.   If you need outputs,// I don't know what the use is.  This should be cut.template <class DataPt, class KernVal>SvmLargeOpt<DataPt, KernVal>::SvmLargeOpt(int svmSize, const IntVec y, DataPt *trnSetPtr, const KernVal (*kernProdFuncPtr)(const DataPt &pt1,				  const DataPt &pt2), DoubleVec cVec, DoubleVec alphaVec, DoubleVec outputVec, double b, int chunkSize, double tol, double eps, int verbosity)  : SvmBase<DataPt, KernVal>(svmSize, y, trnSetPtr,			     kernProdFuncPtr, 1, eps),    chunkSize_(chunkSize), tol_(tol), majorIterations_(0), updateNumber_(1),     optimized_(false), verbosity_(verbosity), outputs_(outputVec),     startType_(warmStart){  initInternal();  setCVec(cVec);  setAllAlphas(alphaVec);  setB(b);}//! The hot-start constructor, you're passing in a *chunkKernCachetemplate <class DataPt, class KernVal>SvmLargeOpt<DataPt, KernVal>::SvmLargeOpt(int svmSize, const IntVec y, DataPt *trnSetPtr, const KernVal (*kernProdFuncPtr)(const DataPt &pt1,				  const DataPt &pt2), DoubleVec cVec, DoubleVec alphaVec, DoubleVec outputVec, double b, int chunkSize,  SvmKernCache<DataPt, KernVal> *chunkKernCache, double tol, double eps, int verbosity)  : SvmBase<DataPt, KernVal>(svmSize, y, trnSetPtr,			     kernProdFuncPtr, 1, eps),    chunkSize_(chunkSize), tol_(tol), majorIterations_(0), updateNumber_(1),     optimized_(false), verbosity_(verbosity), chunkKernCache_(chunkKernCache),    outputs_(outputVec), startType_(hotStart){  initInternal();  setCVec(cVec);  setAllAlphas(alphaVec);  setB(b);}CallTemplate(void, initInternal)() {  int i;  int size = getSize();        if (size < SVM_LARGEOPT_MIN_TOTAL_SIZE) {    cerr << "ERROR: Total size for SvmLargeOpt must be at least " <<      SVM_LARGEOPT_MIN_TOTAL_SIZE << ".  Aborting." << endl;    exit(1);  }        // Check sizes  if (chunkSize_ > size) {    if (verbosity_ >= 2) {      cerr << "Warning: chunkSize_(" << chunkSize_ << 	") > svmSize_(" << size << ").  Adjusting " <<	"chunk size downward." << endl;    }    chunkSize_ = size;  }        workingSetPos_ = new int[size];  lastCheckIter_ = new int[size];  updateHelper_ = new int[size];  tempKernProds_ = new KernVal[size];  if (startType_ == coldStart) { outputs_ = new double[size]; }  for (i = 0; i < size; i++) {     workingSetPos_[i] = -1;    if (startType_ == coldStart) { outputs_[i] = 0; }    lastCheckIter_[i] = 0;    updateHelper_[i] = 0;  }        workingSet_ = new int[chunkSize_];  chunkY_ = new int[chunkSize_];  chunkC_Vec_ = new double[chunkSize_];  chunkAlphas_ = new double[chunkSize_];  chunkTrnSetPtr_ = new DataPt[chunkSize_];  usingLinear_ = false;}CallTemplate(void, useLinearKernel)(int dims, const void(*afunc)(double *&w, const DataPt &p, double amt), const double(*mfunc)(double *w, const DataPt &p)) {  if (usingLinear_ == false) {    int i;    linDims_ = dims;    w_ = new double[dims];    for (i = 0; i < dims; i++) {      w_[i] = 0;    }        int size = getSize();    double eps = getEpsilon();        addToWPtr_ = afunc;    multWPtr_ = mfunc;    for (i = 0; i < size; i++) {      if (getAlpha(i) > eps) {	addToWPtr_(w_, getTrainingExample(i), getY(i)*getAlpha(i));      }    }        usingLinear_ = true;  }}template<class DataPt, class KernVal> SvmLargeOpt<DataPt, KernVal>::~SvmLargeOpt() {  int i;  for (i = 0; i < (int)alphaDiffHistory_.size(); i++) {    delete[] alphaDiffHistory_[i];  }  delete[] workingSet_;  delete[] workingSetPos_;  delete[] chunkY_;  delete[] chunkC_Vec_;  delete[] chunkAlphas_;  delete[] chunkTrnSetPtr_;  if (startType_ == coldStart) { delete[] outputs_; }  delete[] updateHelper_;  delete[] tempKernProds_;    if (chunkKernCacheAllocatedP_) {    delete chunkKernCache_;  }  delete[] selfProds_;  if (usingLinear_) {    delete[] w_;  }}/*! \fn void SvmLargeOpt<DataPt, KernVal>::optimize () * * Optimizes the svm. */CallTemplate(void, optimize)() {  setupAndSolveInitialChunk();  if (verbosity_ >= 2) cout << "Solved initial chunk." << endl;  while (setupAndSolveNewChunk()) {    if (verbosity_ >= 2) cout << "Solved chunk." << endl;	  }      optimized_ = true;}CallTemplate(void, reoptimize)() {  if (verbosity_ >= 2) cout << "Forcing current chunk." << endl;  forceCurrentChunk();  while (setupAndSolveNewChunk()) {    if (verbosity_ >= 2) cout << "Solved chunk." << endl;	  }  optimized_ = true;}CallTemplate(void, setupAndSolveInitialChunk)() {  createInitialWorkingSet();  setupInitialChunkData();  if (verbosity_ >= 2) cout << "Setup initial chunk data. " << endl;  buildAndSolveChunk();  if (verbosity_ >= 2) cout << "Built and solved initial chunk. " << endl;  updateAlphasAndB();  if (verbosity_ >= 2) cout << "Updated alphas and B. " << endl;  destroyChunk();}/*! \fn void SvmLargeOpt<DataPt, KernVal>::setupInitialChunkData () * * Sets up the initial chunk using the indices from workingSet_ * and creates the kernel cache. */CallTemplate(void, setupInitialChunkData)() {  int i;    for (i = 0; i < chunkSize_; i++) {    int ex = workingSet_[i];    chunkY_[i] = getY(ex);    chunkC_Vec_[i] = getC(ex);    chunkTrnSetPtr_[i] = getTrainingExample(ex);    chunkAlphas_[i] = getAlpha(workingSet_[i]);  }  if (startType_ != hotStart) {    chunkKernCache_ = new SvmKernCache<DataPt, KernVal>      (chunkSize_, extraCacheRows_, svmSize_, trnSetPtr_, kernProdFuncPtr_);    chunkKernCacheAllocatedP_ = true;  } else {    chunkKernCacheAllocatedP_ = false;  }  chunkKernCache_->workingSetChanged(workingSet_, eltQueue_);  // This now goes here because we need the initialization of the  // data structure done by workingSetChanged to happen BEFORE we  // read the cachedKernProds.  Bad Hack.  int size = getSize();  selfProds_ = new KernVal[size];  if (startType_ == hotStart && chunkKernCache_->readFromFile_) {    for (i = 0; i < size; i++) {      selfProds_[i] = chunkKernCache_->cachedKernProd(i,i);    }  } else {    for (i = 0; i < size; i++) {      selfProds_[i] = kernProdFuncPtr_(getTrainingExample(i),				       getTrainingExample(i));    }  }}CallTemplate(bool, setupAndSolveNewChunk)() {  // if we can improve the working set...  if (updateWorkingSetAndChunkData()) {    buildAndSolveChunk();    updateAlphasAndB();    destroyChunk();        return true;  } else {    return false;  }}CallTemplate(void, forceCurrentChunk)() {  buildAndSolveChunk();  updateAlphasAndB();  destroyChunk();}/*! \fn bool SvmLargeOpt<DataPt, KernVal>::updateWorkingSetAndChunkData () * * Prepares a new working set for optimization by an SvmSmallOpt. * * Returns false if we're done. * * \todo make sure iteration through the sets is in the proper direction */CallTemplate(bool, updateWorkingSetAndChunkData)() {    int numUSVs = getNumUnbndSupVecs();  int maxToAdd = chunkSize_-numUSVs-SVM_LARGEOPT_NON_USVS_TO_KEEP;  int origQueueSize = eltQueue_.size();  int size, i;  bool didAPivot = false;  ////////////////////  // bounds checking  ////////////////////    if (origQueueSize == 0) {    if (verbosity_ >= 2) {      cerr << "WARNING: All elements are in the working set --- we're done." 	   << endl;    }    return false;  }  if (maxToAdd < SVM_LARGEOPT_MIN_TO_ADD) {    maxToAdd = SVM_LARGEOPT_MIN_TO_ADD;    if (maxToAdd > chunkSize_) { maxToAdd = chunkSize_; }    if (verbosity_ >= 2) {       cerr << "WARNING: chunkSize = " << chunkSize_	   << ", numUSVs = " << numUSVs << "; things could get SLOW..."	   << endl;    }  }  ////////////////////////////////////////////////  // choose points to add to the working set and  // update the working set data structures  ////////////////////////////////////////////////  int numPointsFound = 0;  int numPointsChecked = 0;  int numPointsShrunk = 0;  int checksSinceLastFind = 0;  int firstFound = -1;  int lastFound = -1;  int maxGapSize = 0;  int curGapSize = 0;  list<QueueElt_> removedPoints;  list<QueueElt_> checkedNotAddedPoints;    double *phiDiffs = new double[majorIterations_];  double curPhi = 0;    for (i = majorIterations_ - 1; i >= 0; i--) {    curPhi += phis_[i];    phiDiffs[i] = curPhi;  }  int pointsRemoved, SVsRemoved, SVsAdded;  pointsRemoved = SVsRemoved = SVsAdded = 0;    while ((numPointsChecked < origQueueSize) &&	 ((numPointsFound == 0) ||	  ((numPointsFound < maxToAdd) && 	   (checksSinceLastFind <= firstFound+	    SVM_LARGEOPT_MAX_NONFIND_CHECKS)))) {    QueueElt_ qe = eltQueue_.top();    eltQueue_.pop();    int ex = qe.elt_;        double alpha = getAlpha(ex);    bool checkNeeded = false;    int lc = lastCheckIter_[ex];    double rho = sqrt(selfProds_[ex]*phiDiffs[lc]) + getB() - bHistory_[lc];    double oldOut = outputs_[ex];    int y = getY(ex);        double eps = getEpsilon();    if (alpha < eps) {      if (y*oldOut - rho < 1) { checkNeeded = true; }    } else if (alpha > getC(ex) - eps) {      if (y*oldOut + rho >= 1) { checkNeeded = true; }    } else { // USV not in the working set, check it.      checkNeeded = true;    }        double KKT_Dist = 0;    if (checkNeeded) {       KKT_Dist = KKT_ViolationAmt(ex);    }          // if the point is a violator    if (KKT_Dist > tol_) {      // remove the least offending point from the removal queue       QueueElt_ pointToRemove = removalQueue_.top();      removalQueue_.pop();      pointsRemoved ++;      if (alpha > 0) { SVsAdded++; }      if (getAlpha(pointToRemove.elt_) > 0) { SVsRemoved++; }            // Store the removed point in a list, so we can put it on the       // to be checked queue AFTER the round.      removedPoints.push_back(pointToRemove);      // update the working set with the new point      pivotWorkingSet(ex, pointToRemove.elt_);      // update statistics      checksSinceLastFind = 0;	      if (numPointsFound == 0) { firstFound = numPointsChecked; }      else {		if (curGapSize > maxGapSize) {	  maxGapSize = curGapSize;	}      }            lastFound = numPointsChecked;      numPointsFound++;      curGapSize = 0;    } else {      checkedNotAddedPoints.push_back(QueueElt_(ex, KKT_Dist));      checksSinceLastFind++;      curGapSize++;    }    numPointsChecked++;  }  delete[] phiDiffs;  if (verbosity_ > 0) {    cout << "Checked " << numPointsChecked << " points, adding " 	 << numPointsFound << " to working set." << endl;    cout << "Adding " << SVsAdded << " SVs, removing " << SVsRemoved << " SVs." << endl;  }  ////////////////////////////  // update the kernel cache  ////////////////////////////  // if we changed the working set  if (numPointsFound) {    // add points removed from the working set to the to-be-added set    for (list<QueueElt_>::iterator removedPoints_it = removedPoints.begin();	 removedPoints_it != removedPoints.end(); removedPoints_it++)    {      eltQueue_.push(QueueElt_(removedPoints_it->elt_,			       KKT_ViolationAmt(removedPoints_it->elt_)));    }    // add points we checked but didn't add back to the queue    for (list<QueueElt_>::iterator checkedNotAddedPoints_it =	   checkedNotAddedPoints.begin();	 checkedNotAddedPoints_it != checkedNotAddedPoints.end();	 checkedNotAddedPoints_it++)    {      eltQueue_.push(*checkedNotAddedPoints_it);    }        chunkKernCache_->workingSetChanged(workingSet_, eltQueue_);    return true;  }   else return false;}CallTemplate(void, createInitialWorkingSet)() {  if (startType_ == coldStart) {    createInitialWorkingSetCold();  } else if (startType_ == warmStart) {    createInitialWorkingSetWarm();  } else { // startType_ == hotStart    createInitialWorkingSetHot();  }}/*! \fn void SvmLargeOpt<DataPt, KernVal>::createInitialWorkingSetCold () * * Populates the working set with a balanced selection of points * if possible. * * This is a little complicated.  We make only a single pass * over the data, filling in the working set with positive examples * from the bottom, and negative from the top.  We allow ourselves * to write more than sS/2 of a given class into the array, but if * we later find enough examples of the other class, we overwrite. */CallTemplate(void, createInitialWorkingSetCold)() {  const int sS = chunkSize_; // for readability      // "clever" method for trying to get as many points from  // both classes as possible into the working set.  This avoids  // the need to prerandomize the order of presentation of the  // points, although that's still a good idea for other reasons.      struct initInfo {    int numGood;    int maxGood;    int numFound;    int startPos;    int posChange;  };    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -