📄 svm.cpp
字号:
/* YamCha -- Yet Another Multipurpose CHunk Annotator $Id: svm.cpp,v 1.11 2003/01/06 10:46:35 taku-ku Exp $; Copyright (C) 2001 Taku Kudoh <taku-ku.aist-nara.ac.jp> All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later verjsion. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#pragma warning(disable: 4786)//CCR 2004.04.05#include "common.h"#include "mmap.h"#include <fstream>#include "darts.h"#include "param.h"#include "yamcha.h"#include <cmath>#include <limits>#define MODEL_VERSION 0.3 //CCR ADDnamespace YamCha {#define _YAMCHA_INIT_SVM param(0), mode(0), mmap(0), da(0), dot_buf(0), dot_cache(0), \ result_(0), result(0), version(0), kernel_type(0), \ param_degree(0), param_g(0), param_r(0), param_s(0), \ alpha(0), alpha_size(0), sv_size(0), table_size(0), \ dimension_size(0), nonzero_dimension_size(0), fi(0), \ table(0), da_size(0), model(0), model_size(0), \ class_size(0), sv_length(0),Ssize(0) static inline char *read_ptr (char **ptr, size_t size) { char *r = *ptr; *ptr += size; return r; } template <class T> static inline void read_static (char **ptr, T& value) { char *r = read_ptr (ptr, sizeof (T)); memcpy (&value, r, sizeof (T)); } SVM::SVM(): _YAMCHA_INIT_SVM {} SVM::SVM(const char* filename): _YAMCHA_INIT_SVM { if (! open (filename)) { throw std::runtime_error (_what); } } SVM::~SVM() { close (); } bool SVM::close () { delete [] alpha; delete [] model; delete [] result_; delete [] result; delete [] dot_buf; delete [] dot_cache; delete [] sv_length;//mtt delete [] Ssize;//mtt delete mmap; delete param; param = 0, mode = 0, mmap = 0, da = 0 , dot_buf = 0, dot_cache = 0, \ result_ = 0, result = 0, version = 0, kernel_type = 0, \ param_degree = 0, param_g = 0, param_r = 0, param_s = 0, \ alpha = 0, alpha_size = 0, sv_size = 0, table_size = 0, \ dimension_size = 0, nonzero_dimension_size = 0, fi = 0, \ table = 0, da_size = 0, model = 0, model_size = 0, \ class_size = 0, sv_length = 0, Ssize = 0; return true; } bool SVM::open (const char *filename) { try { param = new Param; mmap = new Mmap<char>; if (! mmap->open (filename)) { throw std::runtime_error (mmap->what()); } char *ptr = mmap->begin (); // kernel specfic param. version = read_ptr (&ptr, 32); // check version if (atof(version) != MODEL_VERSION) { throw std::runtime_error ("model version is different"); } kernel_type = read_ptr (&ptr, 32); read_static<unsigned int>(&ptr, param_degree); read_static<double> (&ptr, param_g); read_static<double> (&ptr, param_r); read_static<double> (&ptr, param_s); // model specfic read_static<unsigned int>(&ptr, model_size); read_static<unsigned int>(&ptr, class_size); read_static<unsigned int>(&ptr, alpha_size); read_static<unsigned int>(&ptr, sv_size); read_static<unsigned int>(&ptr, table_size); read_static<unsigned int>(&ptr, dimension_size); read_static<unsigned int>(&ptr, nonzero_dimension_size); // NOTE: no descrete answre (class_size - 1 == class_size (class_size -1) / 2) // so, we can distingwish the following two mode if (model_size == class_size-1 && model_size != 1) { mode = 1; } else if (model_size == 1 || model_size == class_size * (class_size-1)/2) { mode = 0; } else { throw std::runtime_error ("model/class size is invalid"); } // Double Array read_static<unsigned int>(&ptr, da_size); // read model prameters int param_size; read_static<int>(&ptr, param_size); char *param_str = read_ptr (&ptr, param_size); int pos = 0; while (pos < param_size) { char *key = (param_str + pos); while (param_str[++pos] != '\0') {}; pos++; char *value = param_str + pos; param->setProfile (key, value); while (param_str[++pos] != '\0') {}; pos++; } // class_, list of fixied record (32) result = new Result [class_size]; for (unsigned int i = 0; i < class_size; i++) { result[i].name = read_ptr(&ptr, 32); } if (mode == 1) { class_size--; } // model model = new model_t [model_size]; for (unsigned int i = 0; i < model_size; i++) { read_static<unsigned int>(&ptr, model[i].pos); read_static<unsigned int>(&ptr, model[i].neg); read_static<double>(&ptr, model[i].b); } // alpha, tricky, including dummy filelds alpha = new std::pair<int, double> [alpha_size + model_size]; for (unsigned int i = 0; i < alpha_size + model_size; i++) { read_static<int> (&ptr, alpha[i].first); read_static<double> (&ptr, alpha[i].second); } // feature index fi = (unsigned int *) read_ptr (&ptr, sizeof (unsigned int) * dimension_size); // table table = (int *)( read_ptr (&ptr, sizeof (int) * table_size) ); //存放每一类支持向量的总数 Ssize = new int [4]; for (int i=0; i<4; i++) Ssize[i]=0; //求每一类的支持向量的个数 for (int i=0; i < alpha_size + model_size; i++) { if (alpha[i].first==-1) { i++; while (alpha[i].first!=-1) { if (alpha[i].second>0) Ssize[0]++; else if (alpha[i].second<0) Ssize[1]++; i++; } i++; while (alpha[i].first!=-1) { if (alpha[i].second>0) Ssize[2]++; else if (alpha[i].second<0) Ssize[3]++; i++; } break; } } //mtt sv_length = new int [sv_size]; for (int m=0; m<sv_size; ++m) { sv_length[m]=0; } for (int i=0; i<dimension_size; i++) { for (int j=fi[i]; table[j]!=-1; j++) { sv_length[table[j]]++; } } //std::cout << sv_length[0] << endl; // Double Array da = (unit_t *) read_ptr (&ptr, da_size); // check size if ((unsigned int)(ptr - mmap->begin ()) != mmap->size ()) { throw std::runtime_error ("size of model file is invalid."); } // initilize dot_cache = new double [nonzero_dimension_size+1]; for (unsigned int i = 0; i <= nonzero_dimension_size; i++) { dot_cache[i] = pow (param_s * i + param_r, (int)param_degree); } dot_buf = new unsigned int [sv_size]; result_ = new double [model_size]; return true; } catch (std::exception &e) { _what = std::string ("SVM::open(): ") + e.what (); close (); throw std::runtime_error (_what); return false; } } Result *SVM::classify (unsigned int size, char **features) { std::ofstream os1;//mtt //os1.open("E:\\CcrWork\\TinySvmTest\\errordistance.txt",ios::app);//mtt for (unsigned int i = 0; i < sv_size; i++)// sv_size=支持向量的个数 { dot_buf[i] = 0;// dot_buf[i]=<xi,x> xi为sv x为未知的向量 } for (unsigned int i = 0; i < model_size; i++)//model_size=训练得到的模型 { result_[i] = -(model[i].b); // result_[i] = -b } for (unsigned int i = 0; i < class_size; i++) //class_size=分为3类 { result[i].dist = result[i].score = 0.0; // result[i].dist=和超平面的距离 result[i].flag = 0; } for (unsigned int k = 0;;) { next: if (k == size) { break; } char *key = features[k]; unsigned int len = strlen (key); int b = da[0].base; unsigned int p; for (unsigned int i = 0; i < len; i++) { p = b + (unsigned char)key[i] + 1; if ((unsigned int)b == da[p].check) { b = da[p].base; } else { k++; goto next; } } p = b; int n = da[p].base; if ((unsigned int)b == da[p].check && n < 0)//da: double array fi:feather index { for (int j = fi[-n-1]; table[j] != -1; j++) { dot_buf[table[j]]++; //table[j]中的17位中的一位和这个特征对应位相同 点乘积加一 } } k++; } unsigned int i = 0; for (unsigned int j = 0;;j++) { if (alpha[j].first == -1) { if (++i == model_size) { break; } } else { result_[i] += alpha[j].second * dot_cache[dot_buf[alpha[j].first]]; }//alpha*(1+dot_buf[alpha[j].first])的平方 } unsigned int j=0; double epsilon=0.7; switch (mode) { case 0: for (unsigned int i = 0; i < model_size; i++) { double abresult; abresult=fabs(result_[i]); if (i!=1) { result[result_[i] >= 0 ? model[i].pos : model[i].neg].score++; // score is votes result[model[i].pos].dist += result_[i]; result[model[i].neg].dist -= result_[i]; for ( ; ; j++) { if (alpha[j].first==-1) { j++; break; } } } //mtt else if ((result_[i]>-0.35)&&(result_[i]<0.5)) { if(result_[i] < 0) { result[1].flag = 1; } int pos_type=0; int neg_type=0; const double MIN=-1*numeric_limits<double>::max(); double min=-1*MIN; for (; alpha[j].first!=-1; j++) { double k=pow(18,2); double n=pow((1+sv_length[alpha[j].first]),2); double val=k+n-2*dot_cache[dot_buf[alpha[j].first]]; if (alpha[j].second>0) { val/=sqrt(4422); } else { val/=sqrt(1859); } if (val<min) { min=val; if (alpha[j].second > 0) { pos_type=1; neg_type=0; } else { neg_type=1; pos_type=0; } } else if (val==min) { if (alpha[j].second > 0) ++pos_type; else ++neg_type; } } j++; //os1<<k<<endl; if (pos_type > neg_type) { result[model[i].pos].score++; } else if (pos_type < neg_type) { result[model[i].neg].score++; } else { result[result_[i] >= 0 ? model[i].pos : model[i].neg].score++; } result[model[i].pos].dist += result_[i]; result[model[i].neg].dist -= result_[i]; } else { result[result_[i] >= 0 ? model[i].pos : model[i].neg].score++; // score is votes result[model[i].pos].dist += result_[i]; result[model[i].neg].dist -= result_[i]; for ( ; ; j++) { if (alpha[j].first==-1) { j++; break; } } //os1 << result_[i] << endl; } } break; case 1: for (unsigned int i = 0; i < model_size; i++) { result[model[i].pos].score = result_[i]; result[model[i].pos].dist = result_[i]; } break; } return result; } int SVM::getProfileInt (const char *key) { if (param) { return param->getProfileInt (key); } return 0; } const std::string SVM::getProfileString (const char *key) { if (param) { return param->getProfileString (key); } return std::string (""); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -