📄 la_svm.cpp
字号:
// -*- Mode: c++; c-file-style: "stroustrup"; -*-using namespace std;#include <stdio.h>#include <vector>#include <math.h>#include <ctime>#include <iostream>#include <fstream>#include "vector.h"#include "lasvm.h"#define LINEAR 0#define POLY 1#define RBF 2#define SIGMOID 3 #define ONLINE 0#define ONLINE_WITH_FINISHING 1#define RANDOM 0#define GRADIENT 1#define MARGIN 2#define ITERATIONS 0#define SVS 1#define TIME 2char *kernel_type_table[] = {"linear","polynomial","rbf","sigmoid"};class stopwatch{public: stopwatch() : start(std::clock()){} //start counting time ~stopwatch(); double get_time() { clock_t total = clock()-start;; return double(total)/CLOCKS_PER_SEC; };private: std::clock_t start;};stopwatch::~stopwatch(){ clock_t total = clock()-start; //get elapsed time cout<<"Time(secs): "<<double(total)/CLOCKS_PER_SEC<<endl;}class ID // class to hold split file indices and labels{public: int x; int y; ID() : x(0), y(0) {} ID(int x1,int y1) : x(x1), y(y1) {}};// IDs will be sorted by index, not by label.bool operator<(const ID& x, const ID& y){ return x.x < y.x;}/* Data and model */int m=0; // training set sizevector <lasvm_sparsevector_t*> X; // feature vectorsvector <int> Y; // labelsvector <double> kparam; // kernel parametersvector <double> alpha; // alpha_i, SV weightsdouble b0; // threshold/* Hyperparameters */int kernel_type=RBF; // LINEAR, POLY, RBF or SIGMOID kernelsdouble degree=3,kgamma=-1,coef0=0;// kernel paramsint use_b0=1; // use threshold via constraint \sum a_i y_i =0int selection_type=RANDOM; // RANDOM, GRADIENT or MARGIN selection strategiesint optimizer=ONLINE_WITH_FINISHING; // strategy of optimizationdouble C=1; // C, penalty on errorsdouble C_neg=1; // C-Weighting for negative examplesdouble C_pos=1; // C-Weighting for positive examplesint epochs=1; // epochs of online learningint candidates=50; // number of candidates for "active" selection processdouble deltamax=1000; // tolerance for performing reprocess step, 1000=1 reprocess onlyvector <double> select_size; // Max number of SVs to take with selection strategy (for early stopping) vector <double> x_square; // norms of input vectors, used for RBF/* Programm behaviour*/int verbosity=1; // verbosity level, 0=offint saves=1;char report_file_name[1024]; // filename for the training reportchar split_file_name[1024]="\0"; // filename for the splitsint cache_size=256; // 256Mb cache size as defaultdouble epsgr=1e-3; // tolerance on gradientslong long kcalcs=0; // number of kernel evaluationsint binary_files=0;vector <ID> splits; int max_index=0;vector <int> iold, inew; // sets of old (already seen) points + new (unseen) pointsint termination_type=0;void exit_with_help(){ fprintf(stdout, "Usage: la_svm [options] training_set_file [model_file]\n" "options:\n" "-B file format : files are stored in the following format:\n" " 0 -- libsvm ascii format (default)\n" " 1 -- binary format\n" " 2 -- split file format\n" "-o optimizer: set the type of optimization (default 1)\n" " 0 -- online \n" " 1 -- online with finishing step \n" "-t kernel_type : set type of kernel function (default 2)\n" " 0 -- linear: u'*v\n" " 1 -- polynomial: (gamma*u'*v + coef0)^degree\n" " 2 -- radial basis function: exp(-gamma*|u-v|^2)\n" " 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n" "-s selection: set the type of selection strategy (default 0)\n" " 0 -- random \n" " 1 -- gradient-based \n" " 2 -- margin-based \n" "-T termination: set the type of early stopping strategy (default 0)\n" " 0 -- number of iterations \n" " 1 -- number of SVs \n" " 2 -- time-based \n" "-l sample: number of iterations/SVs/seconds to sample for early stopping (default all)\n" " if a list of numbers is given a model file is saved for each element of the set\n" "-C candidates : set number of candidates to search for selection strategy (default 50)\n" "-d degree : set degree in kernel function (default 3)\n" "-g gamma : set gamma in kernel function (default 1/k)\n" "-r coef0 : set coef0 in kernel function (default 0)\n" "-c cost : set the parameter C of C-SVC\n" "-m cachesize : set cache memory size in MB (default 256)\n" "-wi weight: set the parameter C of class i to weight*C (default 1)\n" "-b bias: use a bias or not i.e. no constraint sum alpha_i y_i =0 (default 1=on)\n" "-e epsilon : set tolerance of termination criterion (default 0.001)\n" "-p epochs : number of epochs to train in online setting (default 1)\n" "-D deltamax : set tolerance for reprocess step, 1000=1 call to reprocess >1000=no calls to reprocess (default 1000)\n" ); exit(1);}void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name){ int i; int clss; double weight; // parse options for(i=1;i<argc;i++) { if(argv[i][0] != '-') break; ++i; switch(argv[i-1][1]) { case 'o': optimizer = atoi(argv[i]); break; case 't': kernel_type = atoi(argv[i]); break; case 's': selection_type = atoi(argv[i]); break; case 'l': while(1) { select_size.push_back(atof(argv[i])); ++i; if((argv[i][0]<'0') || (argv[i][0]>'9')) break; } i--; break; case 'd': degree = atof(argv[i]); break; case 'g': kgamma = atof(argv[i]); break; case 'r': coef0 = atof(argv[i]); break; case 'm': cache_size = (int) atof(argv[i]); break; case 'c': C = atof(argv[i]); break; case 'w': clss= atoi(&argv[i-1][2]); weight = atof(argv[i]); if (clss>=1) C_pos=weight; else C_neg=weight; break; case 'b': use_b0=atoi(argv[i]); break; case 'B': binary_files=atoi(argv[i]); break; case 'e': epsgr = atof(argv[i]); break; case 'p': epochs = atoi(argv[i]); break; case 'D': deltamax = atoi(argv[i]); break; case 'C': candidates = atoi(argv[i]); break; case 'T': termination_type = atoi(argv[i]); break; default: fprintf(stderr,"unknown option\n"); exit_with_help(); } } saves=select_size.size(); if(saves==0) select_size.push_back(100000000); // determine filenames if(i>=argc) exit_with_help(); strcpy(input_file_name, argv[i]); if(i<argc-1) strcpy(model_file_name,argv[i+1]); else { char *p = strrchr(argv[i],'/'); if(p==NULL) p = argv[i]; else ++p; sprintf(model_file_name,"%s.model",p); }}int split_file_load(char *f){ int binary_file=0,labs=0,inds=0; FILE *fp; fp=fopen(f,"r"); if(fp==NULL) {printf("[couldn't load split file: %s]\n",f); exit(1);} char dummy[100],dummy2[100]; unsigned int i,j=0; for(i=0;i<strlen(f);i++) if(f[i]=='/') j=i+1; fscanf(fp,"%s %s",dummy,dummy2); strcpy(&(f[j]),dummy2); fscanf(fp,"%s %d",dummy,&binary_file); fscanf(fp,"%s %d",dummy,&inds); fscanf(fp,"%s %d",dummy,&labs); printf("[split file: load:%s binary:%d new_indices:%d new_labels:%d]\n",dummy2,binary_file,inds,labs); //printf("[split file:%s binary=%d]\n",dummy2,binary_file); if(!inds) return binary_file; while(1) { int i,j; int c=fscanf(fp,"%d",&i); if(labs) c=fscanf(fp,"%d",&j); if(c==-1) break; if (labs) splits.push_back(ID(i-1,j)); else splits.push_back(ID(i-1,0)); } sort(splits.begin(),splits.end()); return binary_file;}int libsvm_load_data(char *filename)// loads the same format as LIBSVM{ int index; double value; int elements, i; FILE *fp = fopen(filename,"r"); lasvm_sparsevector_t* v; if(fp == NULL) { fprintf(stderr,"Can't open input file \"%s\"\n",filename); exit(1); } else printf("loading \"%s\".. \n",filename); int splitpos=0; int msz = 0; elements = 0; while(1) { int c = fgetc(fp); switch(c) { case '\n': if(splits.size()>0) { if(splitpos<(int)splits.size() && splits[splitpos].x==msz) { v=lasvm_sparsevector_create(); X.push_back(v); splitpos++; } } else { v=lasvm_sparsevector_create(); X.push_back(v); } ++msz; //printf("%d\n",m); elements=0; break; case ':': ++elements; break; case EOF: goto out; default: ; } } out: rewind(fp); max_index = 0;splitpos=0; for(i=0;i<msz;i++) { int write=0; if(splits.size()>0) { if(splitpos<(int)splits.size() && splits[splitpos].x==i) { write=2;splitpos++; } } else write=1; int label; fscanf(fp,"%d",&label); // printf("%d %d\n",i,label); if(write) { if(splits.size()>0) { if(splits[splitpos-1].y!=0) Y.push_back(splits[splitpos-1].y); else Y.push_back(label); } else Y.push_back(label); } while(1) { int c; do { c = getc(fp); if(c=='\n') goto out2; } while(isspace(c)); ungetc(c,fp); fscanf(fp,"%d:%lf",&index,&value); if (write==1) lasvm_sparsevector_set(X[m+i],index,value); if (write==2) lasvm_sparsevector_set(X[splitpos-1],index,value); if (index>max_index) max_index=index; } out2: label=1; // dummy } fclose(fp); msz=X.size()-m; printf("examples: %d features: %d\n",msz,max_index); return msz;}int binary_load_data(char *filename){ int msz,i=0,j; lasvm_sparsevector_t* v; int nonsparse=0; ifstream f; f.open(filename,ios::in|ios::binary); // read number of examples and number of features int sz[2]; f.read((char*)sz,2*sizeof(int)); if (!f) { printf("File writing error in line %d.\n",i); exit(1);} msz=sz[0]; max_index=sz[1]; vector <float> val; vector <int> ind; val.resize(max_index); if(max_index>0) nonsparse=1; int splitpos=0; for(i=0;i<msz;i++) { int mwrite=0; if(splits.size()>0) { if(splitpos<(int)splits.size() && splits[splitpos].x==i) { mwrite=1;splitpos++; v=lasvm_sparsevector_create(); X.push_back(v); } } else { mwrite=1; v=lasvm_sparsevector_create(); X.push_back(v); } if(nonsparse) // non-sparse binary file { f.read((char*)sz,1*sizeof(int)); // get label if(mwrite) { if(splits.size()>0 && splits[splitpos-1].y!=0) Y.push_back(splits[splitpos-1].y); else Y.push_back(sz[0]); } f.read((char*)(&val[0]),max_index*sizeof(float));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -