📄 clientincludes.h
字号:
// Copyright (C) 2000 Ryan M. Rifkin <rif@mit.edu>// // This program is free software; you can redistribute it and/or// modify it under the terms of the GNU General Public License as// published by the Free Software Foundation; either version 2 of the// License, or (at your option) any later version.// // This program is distributed in the hope that it will be useful, but// WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU// General Public License for more details.// // You should have received a copy of the GNU General Public License// along with this program; if not, write to the Free Software// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA// 02111-1307, USA.// This is all the junk that gets included in both // svmfutrain and svmfutest. Just saves us from having to have it// twice in the code.using namespace std;// libc#include <stdlib.h>// #include <unistd.h>#include <new>#include <iostream>#include <fstream>#include <getopt.h>// stl#include <vector>#include <queue>#include <algorithm>//////////////////////// The SvmFu library//////////////////////// Globals#include "SvmFuSvmConstants.h"#include "SvmFuSvmTypedefs.h"// The SVM Base Class#include "SvmFuSvmBase.h"// The Kernel Cache Object#include "SvmFuSvmKernCache.h"// The SVM for solving small/medium problems directly#include "SvmFuSvmSmallOpt.h"// The SVM for solving large problems#include "SvmFuSvmLargeOpt.h"// The SVM for testing purposes.#include "SvmFuSvmTest.h"// The definition of the DataPoint class#include "SvmFuSvmDataPoint.h"//////////////////////// SvmFu client code//////////////////////// enumsenum KernType { linear, polynomial, gaussian };KernType kernType = linear;enum MachType {dense, sparse01, sparseN};MachType machType = dense;// global optionsenum Type{ type_none, type_char, type_short, type_int, type_long, type_float, type_double};// definesconst char * SVM_HEADER = "SvmFuSaveFileV4";// global config optionsType g_eltType = type_none;Type g_kernType = type_none;// utility classesclass SplitsMatrix{public: int numSplits; int numClasses; int **splits;};// kernel Functions (depend on DataPoint class)#include "kernelfuncs.h"#include "kernelfuncs.cpp"//! Container for a data settemplate<class DataElt> class DataSet{ public: DataPoint<DataElt> &operator[] (int i) { return points[i]; } int size; //!< Number of points int dim; //!< Max dimensionality of a point int *y; //!< Y vector DataPoint<DataElt> *points;};//! Container for a trained svmtemplate<class DataElt> class TrainedSvm{ public: int size; double *alphas; DataPoint<DataElt> *svs; int *y; double b; KernType kernelType;};//! Wrapper for file i/o funcstemplate<class DataElt, class KernVal> class FileIO{ public: //! Read a data set from a file /*! * \todo optionally remove integrity checks */ static const DataSet<DataElt> readDataSet (const char *inputFile) { DataSet<DataElt> ds; istream *ifsp; bool useStdIn = false; if (strcmp(inputFile, "-")) { // cout << "opening " << inputFile << endl; ifsp = new ifstream(inputFile); } else { ifsp = &cin; useStdIn = true; } if (!ifsp->good()) { cerr << "Error: Cannot open file " << inputFile << " for reading." << endl; exit(-1); } (*ifsp) >> ds.size; if (machType == dense) { (*ifsp) >> ds.dim; } else { ds.dim = 0; } if (!ifsp->good()) { cerr << "FileIO Error 1: Data file is misformatted." << endl; exit(-1); } ds.points = new DataPoint<DataElt> [ds.size]; ds.y = new int[ds.size]; for (int i = 0; i < ds.size; i++) { int dim, j; switch(machType) { case dense: // Read input as the actual data values ds.points[i].value = new DataElt[ds.dim]; ds.points[i].dim = ds.dim; for(j=0;j<ds.dim;j++) (*ifsp) >> ds.points[i].value[j]; break; case sparse01: // Read input as the indices (*ifsp) >> dim; ds.points[i].index = new int[dim]; ds.points[i].dim = dim; for(j=0;j<dim;j++) { (*ifsp) >> ds.points[i].index[j]; if (ds.points[i].index[j] >= ds.dim) { ds.dim = ds.points[i].index[j]+1; } } break; case sparseN: // Read indices _and_ data. // Note that the "dim" is not the actual dimensionality // but instead twice that, since it has indices and values // together. Perhaps the file format should change? (*ifsp) >> dim; if(dim%2) { cerr << "Error on Point " << i << ": Number of elements must be even for sparseN, I'm getting: " << dim << endl; exit(-1); } dim/=2; ds.points[i].index = new int[dim]; ds.points[i].value = new DataElt[dim]; ds.points[i].dim = dim; for(j=0;j<dim;j++) { (*ifsp) >> ds.points[i].index[j]; if (ds.points[i].index[j] >= ds.dim) { ds.dim = ds.points[i].index[j] + 1; } (*ifsp) >> ds.points[i].value[j]; } break; default: cerr << "Error: input routines can't deal with this machType" << endl; break; } // end machtype switch // Modification to handle y=1.0000 instead of y=1 in datafile double tmp; (*ifsp) >> tmp; //malvira: Modification to allow for multiclass. // now it doesn't assume input is -1 or 1, but -1 or tmp. ds.y[i] = (int)(tmp < 0 ? -1 : tmp); if (!ifsp->good()) { cerr << "FileIO Error 2: Data file is misformatted." << endl; exit(-1); } } // end loop for each point if (!useStdIn) { ((ifstream *)ifsp)->close(); delete ifsp; } return ds; } //! Read a splits matrix from a file (for multiclass classification /*! * \todo optionally remove integrity checks */ static const SplitsMatrix readSplitsMatrix (const char *inputFile) { SplitsMatrix sm; ifstream ifsp(inputFile); if (!ifsp.good()) { cerr << "Error: Cannot open file " << inputFile << " for reading." << endl; exit(-1); } ifsp >> sm.numSplits >> sm.numClasses; sm.splits = new (int *)[sm.numSplits]; for (int i = 0; i < sm.numSplits; i++) { sm.splits[i] = new int[sm.numClasses]; for (int j = 0; j < sm.numClasses; j++) { ifsp >> sm.splits[i][j]; } } if (!ifsp.good()) { cerr << "FileIO Error 1: Data file is misformatted." << endl; exit(-1); } ifsp.close(); return sm; } static void saveSvm (SvmBase<DataPoint<DataElt>, KernVal> *svm, const char *filename, bool useAsciiP = false) { // I (jim) dislike ostreams, but I have to use them since this is // templated and could be reading or writing any of a number of // types, and operator<< and operator>> already deal with that.. ofstream os; os.open(filename,ios::out | ios::binary); if (!os) { cerr << "Error: Cannot open " << filename << " for writing." << endl; exit(1); } // ============================== // Header -- always ascii // os << SVM_HEADER << endl; // Parameters // Machine type os << "machtype " << ((machType==sparseN)?"sparseN": (machType==sparse01)?"sparse01":"dense") << endl; // Data point and kernel element type#define IterateTypes(datatype,kerntype) \ if(g_eltType==type_##datatype && g_kernType==type_##kerntype) { \ os << "datatype " << #datatype << endl; \ os << "kerntype " << #kerntype << endl; \ }#include "SvmFuSvmTypes.h" switch(kernType) { case linear: os << "kernfunc linear" << endl; break; case polynomial: os << "kernfunc polynomial" << endl; os << "degree " << degree << endl; os << "bias " << offset << endl; break; case gaussian: os << "kernfunc gaussian" << endl; os << "sigma " << sigma << endl; break; } os << "normalizer " << normalizer << endl; if(useAsciiP) os << "format ascii" << endl; else { os << "format binary" << endl; int test=0x11223344; if(*(char *)&test==0x11) os << "endian big" << endl; else os << "endian little" << endl; os << "int " << sizeof(int) << endl; os << "double " << sizeof(double) << endl; os << "datasize " << sizeof(DataElt) << endl; os << "kernsize " << sizeof(KernVal) << endl; } // ============================== // Data -- ascii or binary depending on useAsciiP os << "data" << endl; int size=svm->getNumSupVecs(); double b = svm->getB(); if(useAsciiP) os << size << ' ' << b << endl; else { os.write((char *)&size,sizeof(int)); os.write((char *)&b,sizeof(double)); } const int *SVs = svm->getSupVecIDsPtr(); for(int i=0;i<size;i++) { int ID = SVs[i]; int Y = svm->getY(ID); double alpha = svm->getAlpha(ID); DataPoint<DataElt> pt = svm->getTrainingExample(ID); if(useAsciiP) { os << ID << ' ' << Y << ' ' << alpha << ' ' << pt.dim; switch(machType) { case dense: for (int j = 0; j < pt.dim; j++) os << ' ' << pt.value[j]; break; case sparse01: for (int j = 0; j < pt.dim; j++) os << ' ' << pt.index[j]; break; case sparseN: for (int j = 0; j < pt.dim; j++) { os << ' ' << pt.index[j]; os << ' ' << pt.value[j]; } break; } os << endl; } else { int dim = pt.dim; os.write((char *)&ID, sizeof(int)); os.write((char *)&Y, sizeof(int)); os.write((char *)&alpha, sizeof(double)); os.write((char *)&dim, sizeof(int)); if(machType==sparse01 || machType==sparseN) os.write((char *)pt.index, dim * sizeof(int)); if(machType==dense || machType==sparseN) os.write((char *)pt.value, dim * sizeof(DataElt)); } } os.close(); } //! Read a trained svm from a file static TrainedSvm<DataElt> loadSvm (const char *filename, bool useAsciiP = false) { TrainedSvm<DataElt> res; ifstream in; in.open(filename,ios::in | ios::binary); if (!in) { cerr << "Error: Cannot reopen file " << svmFile << " for reading." << endl; exit(1); } // We've already read and parsed this file; read it up // until the 'data' line char line[1024]; line[0]='\0'; while(strcmp(line,"data")!=0) { in.getline(line,1024); if(in.eof()) { cout << "Error: couldn't find data in svm save file" << endl; exit(1); } } if (useAsciiP) { in >> res.size >> res.b; } else { in.read((char *)&res.size, sizeof(int)); in.read((char *)&res.b, sizeof(double)); } res.svs = new DataPoint<DataElt>[res.size]; res.alphas = new double[res.size]; res.y = new int[res.size]; // For each point, the ID, alpha, the length of the point (in bytes), // then the point itself. for (int i = 0; i < res.size; i++) { int ID, dim; if (useAsciiP) { in >> ID >> res.y[i] >> res.alphas[i] >> dim; res.svs[i].dim = dim; switch(machType) { case dense: res.svs[i].value = new DataElt[dim]; for (int j = 0; j < dim; j++) in >> res.svs[i].value[j]; break; case sparse01: res.svs[i].index = new int[dim]; for (int j = 0; j < dim; j++) in >> res.svs[i].index[j]; break; case sparseN: res.svs[i].index = new int[dim]; res.svs[i].value = new DataElt[dim]; for (int j = 0; j < dim; j++) { in >> res.svs[i].index[j]; in >> res.svs[i].value[j]; } break; } } else { // read in the sv metadata in.read((char *)&ID, sizeof(int)); in.read((char *)&res.y[i], sizeof(int)); in.read((char *)&res.alphas[i], sizeof(double)); in.read((char *)&dim, sizeof(int)); res.svs[i].dim = dim; if(machType==sparse01 || machType==sparseN) { res.svs[i].index = new int[dim]; in.read((char *)res.svs[i].index, dim * sizeof(int)); } if(machType==dense || machType==sparseN) { res.svs[i].value = new DataElt[dim]; in.read((char *)res.svs[i].value, dim * sizeof(DataElt)); } } } // cout << "Read SVM with " << res.size << " SVs, bias = " // << res.b << "." << endl; in.close(); return res; }};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -