📄 clientincludes.h

📁 This is SvmFu, a package for training and testing support vector machines (SVMs). It s written in C
💻 H
字号:
// Copyright (C) 2000 Ryan M. Rifkin <rif@mit.edu>//  // This program is free software; you can redistribute it and/or// modify it under the terms of the GNU General Public License as// published by the Free Software Foundation; either version 2 of the// License, or (at your option) any later version.//  // This program is distributed in the hope that it will be useful, but// WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU// General Public License for more details.//  // You should have received a copy of the GNU General Public License// along with this program; if not, write to the Free Software// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA// 02111-1307, USA.// This is all the junk that gets included in both // svmfutrain and svmfutest.  Just saves us from having to have it// twice in the code.using namespace std;// libc#include <stdlib.h>// #include <unistd.h>#include <new>#include <iostream>#include <fstream>#include <getopt.h>// stl#include <vector>#include <queue>#include <algorithm>//////////////////////// The SvmFu library//////////////////////// Globals#include "SvmFuSvmConstants.h"#include "SvmFuSvmTypedefs.h"// The SVM Base Class#include "SvmFuSvmBase.h"// The Kernel Cache Object#include "SvmFuSvmKernCache.h"// The SVM for solving small/medium problems directly#include "SvmFuSvmSmallOpt.h"// The SVM for solving large problems#include "SvmFuSvmLargeOpt.h"// The SVM for testing purposes.#include "SvmFuSvmTest.h"// The definition of the DataPoint class#include "SvmFuSvmDataPoint.h"//////////////////////// SvmFu client code//////////////////////// enumsenum KernType { linear, polynomial, gaussian };KernType kernType = linear;enum MachType {dense, sparse01, sparseN};MachType machType = dense;// global optionsenum Type{  type_none,  type_char,  type_short,  type_int,  type_long,  type_float,  type_double};// definesconst char * SVM_HEADER = "SvmFuSaveFileV4";// global config optionsType g_eltType = type_none;Type g_kernType = type_none;// utility classesclass SplitsMatrix{public:  int numSplits;  int numClasses;  int **splits;};// kernel Functions (depend on DataPoint class)#include "kernelfuncs.h"#include "kernelfuncs.cpp"//! Container for a data settemplate<class DataElt> class DataSet{ public:  DataPoint<DataElt> &operator[] (int i) { return points[i]; }  int size; //!< Number of points  int dim;  //!< Max dimensionality of a point  int *y;   //!< Y vector  DataPoint<DataElt> *points;};//! Container for a trained svmtemplate<class DataElt> class TrainedSvm{ public:  int size;  double *alphas;  DataPoint<DataElt> *svs;  int *y;  double b;  KernType kernelType;};//! Wrapper for file i/o funcstemplate<class DataElt, class KernVal> class FileIO{ public:  //! Read a data set from a file  /*!   * \todo optionally remove integrity checks   */  static const DataSet<DataElt> readDataSet (const char *inputFile)  {    DataSet<DataElt> ds;    istream *ifsp;    bool useStdIn = false;        if (strcmp(inputFile, "-"))    {      // cout << "opening " << inputFile << endl;      ifsp = new ifstream(inputFile);    }    else    {      ifsp = &cin;      useStdIn = true;    }        if (!ifsp->good())    {      cerr << "Error: Cannot open file " << inputFile << " for reading." << endl;      exit(-1);    }        (*ifsp) >> ds.size;    if (machType == dense) {      (*ifsp) >> ds.dim;    } else {      ds.dim = 0;    }    if (!ifsp->good())    {      cerr << "FileIO Error 1: Data file is misformatted." << endl;      exit(-1);    }    ds.points = new DataPoint<DataElt> [ds.size];    ds.y = new int[ds.size];        for (int i = 0; i < ds.size; i++)    {      int dim, j;      switch(machType) {      case dense:	// Read input as the actual data values	ds.points[i].value = new DataElt[ds.dim];	ds.points[i].dim = ds.dim;	for(j=0;j<ds.dim;j++)	  (*ifsp) >> ds.points[i].value[j];	break;      case sparse01:	// Read input as the indices	(*ifsp) >> dim;	ds.points[i].index = new int[dim];	ds.points[i].dim = dim;	for(j=0;j<dim;j++) {	  (*ifsp) >> ds.points[i].index[j];	  if (ds.points[i].index[j] >= ds.dim) {	    ds.dim = ds.points[i].index[j]+1;	  }	}	break;      case sparseN:	// Read indices _and_ data.	// Note that the "dim" is not the actual dimensionality	// but instead twice that, since it has indices and values 	// together.  Perhaps the file format should change?	(*ifsp) >> dim;	if(dim%2) {	  cerr << "Error on Point " << i << ": Number of elements must be even for sparseN, I'm getting: " << dim << endl;	  exit(-1);	}	dim/=2;	ds.points[i].index = new int[dim];	ds.points[i].value = new DataElt[dim];	ds.points[i].dim = dim;	for(j=0;j<dim;j++) {	  (*ifsp) >> ds.points[i].index[j];	  if (ds.points[i].index[j] >= ds.dim) {	    ds.dim = ds.points[i].index[j] + 1;	  }	  (*ifsp) >> ds.points[i].value[j];	}		break;      default:	cerr << "Error: input routines can't deal with this machType" << endl;	break;      } // end machtype switch            // Modification to handle y=1.0000 instead of y=1 in datafile      double tmp;      (*ifsp) >> tmp;      //malvira: Modification to allow for multiclass.       //         now it doesn't assume input is -1 or 1, but -1 or tmp.      ds.y[i] = (int)(tmp < 0 ? -1 : tmp);             if (!ifsp->good())      {	cerr << "FileIO Error 2: Data file is misformatted." << endl;	exit(-1);      }    } // end loop for each point        if (!useStdIn)    {      ((ifstream *)ifsp)->close();	      delete ifsp;    }        return ds;  }      //! Read a splits matrix from a file (for multiclass classification  /*!   * \todo optionally remove integrity checks   */  static const SplitsMatrix readSplitsMatrix (const char *inputFile)  {    SplitsMatrix sm;    ifstream ifsp(inputFile);        if (!ifsp.good())    {      cerr << "Error: Cannot open file " << inputFile << " for reading." 	   << endl;      exit(-1);    }        ifsp >> sm.numSplits >> sm.numClasses;    sm.splits = new (int *)[sm.numSplits];    for (int i = 0; i < sm.numSplits; i++) {      sm.splits[i] = new int[sm.numClasses];      for (int j = 0; j < sm.numClasses; j++) {	ifsp >> sm.splits[i][j];      }    }    if (!ifsp.good())    {      cerr << "FileIO Error 1: Data file is misformatted." << endl;      exit(-1);    }        ifsp.close();    return sm;  }  static void saveSvm (SvmBase<DataPoint<DataElt>, KernVal> *svm,		       const char *filename, 		       bool useAsciiP = false)  {    // I (jim) dislike ostreams, but I have to use them since this is    // templated and could be reading or writing any of a number of    // types, and operator<< and operator>> already deal with that..    ofstream os;    os.open(filename,ios::out | ios::binary);    if (!os) {      cerr << "Error: Cannot open " << filename << " for writing." << endl;      exit(1);    }    // ==============================    // Header -- always ascii    //    os << SVM_HEADER << endl;        // Parameters    // Machine type    os << "machtype " <<       ((machType==sparseN)?"sparseN":      (machType==sparse01)?"sparse01":"dense") << endl;    // Data point and kernel element type#define IterateTypes(datatype,kerntype) \    if(g_eltType==type_##datatype && g_kernType==type_##kerntype) { \       os << "datatype " << #datatype << endl; \       os << "kerntype " << #kerntype << endl; \    }#include "SvmFuSvmTypes.h"    switch(kernType)      {      case linear:	os << "kernfunc linear" << endl;	break;      case polynomial:	os << "kernfunc polynomial" << endl;	os << "degree " << degree << endl;	os << "bias " << offset << endl;	break;      case gaussian:	os << "kernfunc gaussian" << endl;	os << "sigma " << sigma << endl;	break;      }    os << "normalizer " << normalizer << endl;        if(useAsciiP)       os << "format ascii" << endl;    else       {	os << "format binary" << endl;	int test=0x11223344;	if(*(char *)&test==0x11)	  os << "endian big" << endl;	else 	  os << "endian little" << endl;	os << "int " << sizeof(int) << endl;	os << "double " << sizeof(double) << endl;	os << "datasize " << sizeof(DataElt) << endl;	os << "kernsize " << sizeof(KernVal) << endl;      }    // ==============================    // Data -- ascii or binary depending on useAsciiP    os << "data" << endl;    int size=svm->getNumSupVecs();    double b = svm->getB();    if(useAsciiP)      os << size << ' ' << b << endl;    else {      os.write((char *)&size,sizeof(int));      os.write((char *)&b,sizeof(double));    }    const int *SVs = svm->getSupVecIDsPtr();        for(int i=0;i<size;i++) {      int ID = SVs[i];      int Y = svm->getY(ID);      double alpha = svm->getAlpha(ID);      DataPoint<DataElt> pt = svm->getTrainingExample(ID);            if(useAsciiP) {	os << ID << ' ' << Y << ' ' << alpha << ' ' << pt.dim;	switch(machType) {	case dense:	  for (int j = 0; j < pt.dim; j++)	    os << ' ' << pt.value[j];	  break;	case sparse01:	  for (int j = 0; j < pt.dim; j++)	    os << ' ' << pt.index[j];	  break;	case sparseN:	  for (int j = 0; j < pt.dim; j++) {	    os << ' ' << pt.index[j];	    os << ' ' << pt.value[j];	  }	  break;	}	os << endl;      }  else {	int dim = pt.dim;	os.write((char *)&ID, sizeof(int));	os.write((char *)&Y, sizeof(int));	os.write((char *)&alpha, sizeof(double));	os.write((char *)&dim, sizeof(int));	if(machType==sparse01 || machType==sparseN)	  os.write((char *)pt.index, dim * sizeof(int));	if(machType==dense || machType==sparseN)	  os.write((char *)pt.value, dim * sizeof(DataElt));      }    }        os.close();  }    //! Read a trained svm from a file  static TrainedSvm<DataElt> loadSvm (const char *filename,				      bool useAsciiP = false)  {    TrainedSvm<DataElt> res;        ifstream in;    in.open(filename,ios::in | ios::binary);    if (!in) {      cerr << "Error: Cannot reopen file " << svmFile 	   << " for reading." << endl;      exit(1);    }        // We've already read and parsed this file; read it up    // until the 'data' line    char line[1024];    line[0]='\0';    while(strcmp(line,"data")!=0) {      in.getline(line,1024);      if(in.eof()) {	cout << "Error: couldn't find data in svm save file" << endl;	exit(1);      }    }        if (useAsciiP) {      in >> res.size >> res.b;    } else {      in.read((char *)&res.size, sizeof(int));      in.read((char *)&res.b, sizeof(double));    }        res.svs = new DataPoint<DataElt>[res.size];    res.alphas = new double[res.size];    res.y = new int[res.size];        // For each point, the ID, alpha, the length of the point (in bytes),     // then the point itself.    for (int i = 0; i < res.size; i++) {      int ID, dim;            if (useAsciiP) {	in >> ID >> res.y[i] >> res.alphas[i] >> dim;	res.svs[i].dim = dim;	switch(machType) {	case dense:	  res.svs[i].value = new DataElt[dim];	  for (int j = 0; j < dim; j++)	    in >> res.svs[i].value[j];	  break;	case sparse01:	  res.svs[i].index = new int[dim];	  for (int j = 0; j < dim; j++)	    in >> res.svs[i].index[j];	  break;	case sparseN:	  res.svs[i].index = new int[dim];	  res.svs[i].value = new DataElt[dim];	  for (int j = 0; j < dim; j++) {	    in >> res.svs[i].index[j];	    in >> res.svs[i].value[j];	  }	  break;	}      } else {	// read in the sv metadata	in.read((char *)&ID, sizeof(int));	in.read((char *)&res.y[i], sizeof(int));	in.read((char *)&res.alphas[i], sizeof(double));	in.read((char *)&dim, sizeof(int));	res.svs[i].dim = dim;		if(machType==sparse01 || machType==sparseN) {	  res.svs[i].index = new int[dim];	  in.read((char *)res.svs[i].index, dim * sizeof(int));	}	if(machType==dense || machType==sparseN) {	  res.svs[i].value = new DataElt[dim];	  in.read((char *)res.svs[i].value, dim * sizeof(DataElt));	}      }    }        // cout << "Read SVM with " << res.size << " SVs, bias = "     //	 << res.b << "." << endl;        in.close();    return res;  }};
💿 文件大小 110 K
👤 上传用户 renyumeng
📂 所属分类其他
🏷️ 相关标签

#machines #training #package #testing
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -