📄 svm_common.cpp
字号:
#include "stdafx.h"
#include "svm.h"
#include "svm_common.h"
#include "math.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
static char temstr[200];
/* classifies example */
double classify_example(MODEL *model,DOC *ex)
{
register long i;
register double dist;
dist=0;
for(i=1;i<model->sv_num;i++) {
dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i];
}
return(dist-model->b);
}
/* classifies example for linear kernel
important: the model must have the linear weight vector computed
important: the feature numbers in the example to classify must
not be larger than the weight vector! */
double classify_example_linear(MODEL *model,DOC *ex)
{
return((double)(sprod_ns(model->lin_weights,ex->words)-model->b));
}
/* calculate the kernel function */
CFLOAT kernel(KERNEL_PARM *kernel_parm,DOC *a,DOC*b)
{
com_result.kernel_cache_statistic++;
switch(kernel_parm->kernel_type)
{
case 0: /* linear */
return((CFLOAT)sprod_ss(a->words,b->words));
case 1: /* polynomial */
return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree));
case 2: /* radial basis function */
return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
case 3: /* sigmoid neural net */
return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const));
case 4: /* custom-kernel supplied in file kernel.h*/
return((CFLOAT)custom_kernel(kernel_parm,a,b));
//chen .test sum of
//return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)+exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
default: sprintf(temstr,"Error: Unknown kernel function");
printm(temstr);
return (-1);
}
}
/* compute the inner product of two sparse vectors */
double sprod_ss(SVM_WORD *a,SVM_WORD*b)
{
register FVAL sum=0;
register SVM_WORD *ai,*bj;
ai=a;
bj=b;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
bj++;
}
else if (ai->wnum < bj->wnum) {
ai++;
}
else {
sum+=ai->weight * bj->weight;
ai++;
bj++;
}
}
return((double)sum);
}
/* compute the inner product of two sparse vectors,b right shit 1 bit */
double sprod_ss1(SVM_WORD *a,SVM_WORD*b,int offset)
{
register FVAL sum=0;
register SVM_WORD *ai,*bj;
ai=a;
bj=b;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum+offset) {
bj++;
}
else if (ai->wnum < bj->wnum+offset) {
ai++;
}
else
{
int np=(ai->wnum-1)%16+1+offset;
if (np>0 && np<17)
sum+=ai->weight * bj->weight;
ai++;
bj++;
}
}
return((double)sum);
}
double sprod_ss2(SVM_WORD *a,SVM_WORD*b,int offset)
{
register FVAL sum=0;
register SVM_WORD *ai,*bj;
ai=a;
bj=b;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum+offset) {
bj++;
}
else if (ai->wnum < bj->wnum+offset) {
ai++;
}
else
{
int np=ai->wnum+offset;
if (np>0 && np<257)
sum+=ai->weight * bj->weight;
ai++;
bj++;
}
}
return((double)sum);
}
/* compute length of weight vector */
double model_length_s(MODEL *model,KERNEL_PARM *kernel_parm)
{
register long i,j;
register double sum=0,alphai;
register DOC *supveci;
for(i=1;i<model->sv_num;i++) {
alphai=model->alpha[i];
supveci=model->supvec[i];
for(j=1;j<model->sv_num;j++) {
sum+=alphai*model->alpha[j]
*kernel(kernel_parm,supveci,model->supvec[j]);
}
}
return(sqrt(sum));
}
void clear_vector_n(double *vec,long n)
{
register long i;
for(i=0;i<=n;i++) vec[i]=0;
}
void add_vector_ns(double *vec_n,SVM_WORD *vec_s,double faktor)
{
register SVM_WORD *ai;
ai=vec_s;
while (ai->wnum) {
vec_n[ai->wnum]+=(faktor*ai->weight);
ai++;
}
}
double sprod_ns(double *vec_n,SVM_WORD *vec_s)
{
register double sum=0;
register SVM_WORD *ai;
ai=vec_s;
while (ai->wnum) {
sum+=(vec_n[ai->wnum]*ai->weight);
ai++;
}
return(sum);
}
/* compute weight vector in linear case and add to model*/
void add_weight_vector_to_linear_model(MODEL *model)
{
long i;
model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
clear_vector_n(model->lin_weights,model->totwords);
for(i=1;i<model->sv_num;i++) {
add_vector_ns(model->lin_weights,(model->supvec[i])->words,
model->alpha[i]);
}
}
int read_model(char *modelfile,MODEL *model,long max_words,long ll)
{
FILE *modelfl;
long j,i;
char *line;
SVM_WORD *words;
register long wpos;
long wnum,pos;
double weight;
char version_buffer[100];
if (com_pro.show_action)
{
sprintf(temstr,"Reading model...");
printm(temstr);
}
words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words+10));
line = (char *)my_malloc(sizeof(char)*ll);
if ((modelfl = fopen (modelfile, "r")) == NULL)
{
printe (modelfile);
return -1;
}
fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
if(strcmp(version_buffer,VERSION))
{
printe ("Version of model-file does not match version of svm_classify!");
return -1;
}
fscanf(modelfl,"%ld # kernel type\n",&(model->kernel_parm.kernel_type));
fscanf(modelfl,"%ld # kernel parameter -d \n",&(model->kernel_parm.poly_degree));
fscanf(modelfl,"%lf # kernel parameter -g \n",&(model->kernel_parm.rbf_gamma));
fscanf(modelfl,"%lf # kernel parameter -s \n",&(model->kernel_parm.coef_lin));
fscanf(modelfl,"%lf # kernel parameter -r \n",&(model->kernel_parm.coef_const));
fscanf(modelfl,"%s # kernel parameter -u \n",&(model->kernel_parm.custom));
fscanf(modelfl,"%ld # highest feature index \n",&(model->totwords));
fscanf(modelfl,"%ld # number of training documents \n",&(model->totdoc));
fscanf(modelfl,"%ld # number of support vectors plus 1 \n",&(model->sv_num));
fscanf(modelfl,"%lf # threshold b \n",&(model->b));
for(i=1;i<model->sv_num;i++)
{
fgets(line,(int)ll,modelfl);
pos=0;
wpos=0;
sscanf(line,"%lf",&model->alpha[i]);
while(line[++pos]>' ');
while((sscanf(line+pos,"%ld:%lf",&wnum,&weight) != EOF) && (wpos<max_words))
{
while(line[++pos]>' ');
words[wpos].wnum=wnum;
words[wpos].weight=weight;
wpos++;
}
model->supvec[i] = (DOC *)my_malloc(sizeof(DOC));
(model->supvec[i])->words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(wpos+1));
for(j=0;j<wpos;j++)
{
(model->supvec[i])->words[j]=words[j];
}
((model->supvec[i])->words[wpos]).wnum=0;
(model->supvec[i])->twonorm_sq = sprod_ss((model->supvec[i])->words,(model->supvec[i])->words);
(model->supvec[i])->docnum = -1;
}
fclose(modelfl);
free(line);
free(words);
if (com_pro.show_readfile)
{
sprintf(temstr, "OK. (%d support vectors read)",(int)(model->sv_num-1));
printm(temstr);
}
}
/*read the data from text documents*/
int read_documents(char *docfile,
DOC *docs,
long *label,
long max_words_doc,
long ll,
long *totwords,
long *totdoc)
{
char *line;
DOC doc;
long dnum=0,wpos,i,dpos=0,dneg=0,dunlab=0;
long doc_label;
FILE *docfl;
line = (char *)my_malloc(sizeof(char)*ll);
if ((docfl = fopen (docfile, "r")) == NULL)
{
printe (docfile);
return -1;
}
doc.words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words_doc+10));
if (com_pro.show_readfile)
{
sprintf(temstr,"Reading examples into memory...");
printm(temstr);
}
dnum=0;
(*totwords)=0;
while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
if(line[0] == '#') continue; /* line contains comments */
if(!parse_document(line,&doc,&doc_label,&wpos,max_words_doc))
{
sprintf(temstr,"Parsing error in line %ld!",dnum);
printm(temstr);
}
label[dnum]=doc_label;
if(doc_label > 0) dpos++;
if (doc_label < 0) dneg++;
if (doc_label == 0) dunlab++;
if((wpos>1) && ((doc.words[wpos-2]).wnum>(*totwords)))
(*totwords)=(doc.words[wpos-2]).wnum;
docs[dnum].words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*wpos);
docs[dnum].docnum=dnum;
for(i=0;i<wpos;i++)
docs[dnum].words[i]=doc.words[i];
docs[dnum].twonorm_sq=doc.twonorm_sq;
dnum++;
if((dnum % 100) == 0&&com_pro.show_readfile)
{
sprintf(temstr,"read %ld..",dnum);
printm(temstr);
}
}
fclose(docfl);
free(line);
free(doc.words);
if (com_pro.show_action)
{
sprintf(temstr, "OK. (%ld examples read)", dnum);
printm(temstr);
sprintf(temstr,"%ld positive, %ld negative, and %ld unlabeled examples.",dpos,dneg,dunlab);
printm(temstr);
}
(*totdoc)=dnum;
}
/*Parse one line of data file */
int parse_document(char *line,DOC *doc,long *label,long*numwords,long max_words_doc)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -