📄 ginisvm.cpp
字号:
/*****************************************************************************/// NAME : HMMSVM // // DESCRIPTION : Support vector machine block that implements the // support vector classifier. This implements the the SMO// algorithm./////*****************************************************************************/#include<ginisvm.h>#include<stdlib.h>#include<stdio.h>#include<math.h>#include<sys/time.h>#define _HMMSVM_DEBUGON_/*****************************************************************************/// FUNCTION : Constructor function// // DESCRIPTION : Initializes the SVM machine. By default it goes into// non training mode. //// INPUT ://// OUTPUT ://///*****************************************************************************/GINI_SVMBlock::GINI_SVMBlock( GINI_SVMKernel *initkernel ){ // Set the kernel type to the current kernel mode = GINISVM_RAW; kernel = initkernel; lagrange = (GINI_double**) GINI_NULL; supportVectors = (GINI_double**) GINI_NULL; numberofSVs = 0; ginierr = GINI_NO_ERROR; timer1 = 0.0; timer2 = 0.0; timer3 = 0.0; timer4 = 0.0; timer5 = 0.0; timer6 = 0.0; timer7 = 0.0; timer8 = 0.0;}/*****************************************************************************/// FUNCTION : Destructor// // DESCRIPTION : Delete the support vectors and the lagrance multipliers//// INPUT ://// OUTPUT ://///*****************************************************************************/GINI_SVMBlock::~GINI_SVMBlock( ){ for ( GINI_u32 i = 0; i < numberofSVs; i++ ) { delete [] supportVectors[i]; //***************** delete [] lagrange[i]; } delete [] supportVectors; delete [] lagrange; delete [] bias;}/*****************************************************************************/// FUNCTION : InitTraining// // DESCRIPTION : Initialize all the parameters for support vector training//// INPUT ://// OUTPUT ://///*****************************************************************************/GINI_bool GINI_SVMBlock::InitTraining( GINI_u32 numofdata, GINI_u32 numofdim, GINI_u32 numofclasses, GINI_double *inpC, GINI_double inprdist, GINI_double inpalphaeps, GINI_double inpkkteps, GINI_u32 inpsrch, GINI_double inpcosteps, GINI_u32 inpcostwindow, GINI_u32 hitthreshold, GINI_u32 inpsvsrch, GINI_u32 inpliter, GINI_u32 inpfpass){ GINI_u32 i,j; // Allocate memory for training data and labels traindata = new (GINI_double*)[numofdata]; Y = new (GINI_double*)[numofdata]; svset = new (GINI_Set*)[numofclasses]; // Set of unbounded vectors // for each class. maxE = new (GINI_Set*)[numofclasses]; // Set to hold the maximum and minE = new (GINI_Set*)[numofclasses]; // Set to hold the minimum value for // for each class. bias = new (GINI_double)[numofclasses]; setsize = new (GINI_u32)[numofclasses]; // Total number of training data and total number of // classes. maxtraindata = numofdata; classes = numofclasses; phase1 = 0; phase2 = 0; phase3 = 0; phase4 = 0; // Dimensionality of the input feature vector dimension = numofdim; C = inpC; // Regularization constant alphaeps = inpalphaeps; // Convergence factor for alpha condition kkteps = inpkkteps; // Convergence factor for KKT condition srchwindow = inpsrch; // Search window for random sampling rdist = inprdist; // Rate Distortion Factor. costeps = inpcosteps; // Valid decrease in cost function for // the algorithm to continue. costwindow = inpcostwindow; // Computes the cost window after every // cost window times and checks the // decrease. numofhits = hitthreshold; // Minimum number of cache hits for the data to // be inserted into the cache. maxsvsrch = inpsvsrch; kktiter = inpliter; // Number of kkt iterations before increasing // the kkt tolerance. fpass = inpfpass; // Total number of iterations before speeding things // up. totaldata = 0; // Current num of data stored. iterations = 0; // Current iteration cachefull = GINI_FALSE; // Kernel cache has space available startupsize = kernel->GetCachesize()/numofclasses; // Sanity checks if ( ( Y == (GINI_double**) GINI_NULL ) || ( traindata == (GINI_double**) GINI_NULL ) || ( svset == (GINI_Set**) GINI_NULL ) || ( maxE == (GINI_Set**) GINI_NULL ) || ( minE == (GINI_Set**) GINI_NULL ) || ( bias == (GINI_double*) GINI_NULL ) || ( setsize == (GINI_u32*) GINI_NULL ) ) { ginierr = GINI_OUTOF_MEMORY; return GINI_FALSE; } // Initialize the array of svsets. for ( i = 0; i < classes; i++) { svset[i] = (GINI_Set*) GINI_NULL; } // Initialize the maxE and minE set for ( i = 0; i< classes; i++ ) { minE[i] = (GINI_Set*) GINI_NULL; maxE[i] = (GINI_Set*) GINI_NULL; setsize[i] = 0; bias[i] = 0.0; } // Initialize the svmap = 0. svmap = new (GINI_Set**)[numofdata]; if ( svmap == ( GINI_Set***) GINI_NULL ) { ginierr = GINI_OUTOF_MEMORY; return GINI_FALSE; } for ( i = 0; i < numofdata; i++ ) { svmap[i] = new (GINI_Set*)[classes]; if ( svmap[i] == ( GINI_Set**) GINI_NULL ) { ginierr = GINI_OUTOF_MEMORY; return GINI_FALSE; } for ( j = 0; j < classes; j++ ) { svmap[i][j] = (GINI_Set*) GINI_NULL; } } return GINI_TRUE;}/*****************************************************************************/// FUNCTION : InsertTrainingData// // DESCRIPTION : Fills up data structures with training data and its // corresponding labels.//// INPUT ://// OUTPUT ://///*****************************************************************************/GINI_bool GINI_SVMBlock::InsertTrainingData( GINI_double* label, GINI_double* data ){ traindata[totaldata] = data; Y[totaldata] = label; // For the true class create an sv element and update its for ( GINI_u32 i = 0; i < classes; i++ ) {// if ( setsize[i] < startupsize )// { if ( Y[totaldata][i] > alphaeps ) { svmap[totaldata][i] = _addelement ( totaldata, i, -2*rdist*Y[totaldata][i], 0 ); //cachefull = kernel->InsertCache(totaldata); minE[i] = svmap[totaldata][i]; maxE[i] = svmap[totaldata][i]; setsize[i]++; }// } } totaldata++; return GINI_TRUE;}/*****************************************************************************/// FUNCTION : InsertTrainingData// // DESCRIPTION : Fills up data structures with training data and its // corresponding labels.//// INPUT ://// OUTPUT ://///*****************************************************************************/GINI_bool GINI_SVMBlock::InsertTrainingData( GINI_double* label, GINI_double* data, GINI_double prior ){ traindata[totaldata] = data; Y[totaldata] = label; C[totaldata] *= prior; // For the true class create an sv element and update its for ( GINI_u32 i = 0; i < classes; i++ ) {// if ( setsize[i] < startupsize )// { if ( Y[totaldata][i] > alphaeps ) { svmap[totaldata][i] = _addelement ( totaldata, i, -2*rdist*Y[totaldata][i], 0 ); //cachefull = kernel->InsertCache(totaldata); minE[i] = svmap[totaldata][i]; maxE[i] = svmap[totaldata][i]; setsize[i]++; }// } } totaldata++; return GINI_TRUE;}/*****************************************************************************/// FUNCTION : _biasestimate// // DESCRIPTION : Estimates the bias based on the current state of // alpha and labels.//// INPUT ://// OUTPUT ://///*****************************************************************************/void GINI_SVMBlock::_biasestimate(){ GINI_u32 compute,equalflag,pickclass; GINI_Set *currptr; GINI_u32 *leftset = new GINI_u32[classes]; GINI_u32 *doneset = new GINI_u32[classes]; GINI_u32 *count = new GINI_u32[classes]; GINI_int i; // For each class initialize class indicators. for( i = 0; i < (GINI_int)classes; i++ ) { leftset[i] = 1; bias[i] = 0; doneset[i] = 0; } // Assuming the bias of the last class as the reference = 0. doneset[classes-1] = 1; compute = 0; equalflag = 0; while ( compute == 0 ) { // Check if the left out set is already empty then it means that // for some classes the biases cannot be computed. For this // pathological case set all those biases = 0; for (i=0; i< (GINI_int)classes; i++ ) { if ( leftset[i] != 0 ) { equalflag = 1; break; } } // If the left set is already empty then it means there // classes for which bias cannot be computed. We entered // this loop because the doneset was still empty. if ( equalflag == 0 ) { for ( i = 0; i < (GINI_int)classes; i++ ) { // Set the corresponding indicator for that class // to be 0 if (doneset[i] == 0) { bias[i] = 0; } } break; } // Pick an element out of the leftout set pickclass = 0; for ( i = classes-1; i >=0; i-- ) { if (( leftset[i] == 1) && (doneset[i] == 1)) { pickclass = i; leftset[i] = 0; break; } } // If no classes are picked, means theres no class with reference // to which further biases can be computed. if (pickclass == 0 ) { for ( i = 0; i < (GINI_int)classes; i++ ) { if (doneset[i] == 0) { bias[i] = 0; } } break; } // Set the counts for all the biases = 0; for ( i = 0; i <(GINI_int)classes-1; i++) { count[i] = 0; } // The bias computation algorithm finds all possible paths starting // from class M to class 0 and in the process computing all the // biases with reference to the other. // Iterate over the svset list. GINI_Set *startptr = svset[pickclass]; while ( startptr != (GINI_Set*) GINI_NULL ) { if ( startptr->alpha < C[startptr->dataind]*Y[startptr->dataind][pickclass] - alphaeps) { for ( i = 0; i < (GINI_int)classes-1; i++ ) { if (doneset[i] == 0) { if ((currptr = svmap[startptr->dataind][i]) != (GINI_Set*) GINI_NULL ) { if ( currptr->alpha < C[currptr->dataind]*Y[currptr->dataind][i] - alphaeps) { bias[i] = startptr->E - currptr->E - bias[pickclass]; //printf("%d=(%d,%3.4f),(%d,%3.4f) %3.4f\n",startptr->dataind,pickclass,startptr->E,i,currptr->E,bias[i]); count[i]++; } } } } } startptr = startptr->next; } // Average out the bias factor to get an estimate of the bias // insert the class in the done set. for ( i = 0; i < (GINI_int)classes-1; i++ ) { if ( count[i] != 0 ) { bias[i] /= count[i]; doneset[i] = 1; } } // Check to see if all the biases have been computed or // not. compute = 1; for (i=0; i< (GINI_int)classes; i++ ) { if ( doneset[i] != 1 ) { compute = 0; break; } } } // delete all the temporary memory delete [] leftset; delete [] doneset; delete [] count;}/*****************************************************************************/// FUNCTION : _kktcondition// // DESCRIPTION : Computes the KKT condition for a data point and returns// true if it satisfies otherwise false.// We are going to use the theorem that for GiniSVM, for// each data point there exists one unbounded variable.// One has to be careful here because if there exist// a class where there are no corresponding labels then// this algorithm might not progress or the progress// becomes very slow. Therefore the point and the class// is chosen which violates the KKT condition the most.//// INPUT ://// OUTPUT ://///*****************************************************************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -