📄 svm_common.c
字号:
/************************************************************************/
/* */
/* svm_common.c */
/* */
/* Definitions and functions used in both svm_learn and svm_classify. */
/* */
/* Author: Thorsten Joachims */
/* Date: 02.07.04 */
/* */
/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */
/* */
/* This software is available for non-commercial use only. It must */
/* not be modified and distributed without prior permission of the */
/* author. The author is not responsible for implications from the */
/* use of this software. */
/* */
/************************************************************************/
# include "ctype.h"
# include "svm_common.h"
# include "kernel.h" /* this contains a user supplied kernel */
long verbosity; /* verbosity level (0-4) */
long kernel_cache_statistic;
double classify_example(MODEL *model, DOC *ex)
/* classifies one example */
{
register long i;
register double dist;
if((model->kernel_parm.kernel_type == LINEAR) && (model->lin_weights))
return(classify_example_linear(model,ex));
dist=0;
for(i=1;i<model->sv_num;i++) {
dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i];
}
return(dist-model->b);
}
double classify_example_linear(MODEL *model, DOC *ex)
/* classifies example for linear kernel */
/* important: the model must have the linear weight vector computed */
/* use: add_weight_vector_to_linear_model(&model); */
/* important: the feature numbers in the example to classify must */
/* not be larger than the weight vector! */
{
double sum=0;
SVECTOR *f;
for(f=ex->fvec;f;f=f->next)
sum+=f->factor*sprod_ns(model->lin_weights,f);
return(sum-model->b);
}
CFLOAT kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b)
/* calculate the kernel function */
{
double sum=0;
SVECTOR *fa,*fb;
/* in case the constraints are sums of feature vector as represented
as a list of SVECTOR's with their coefficient factor in the sum,
take the kernel between all pairs */
for(fa=a->fvec;fa;fa=fa->next) {
for(fb=b->fvec;fb;fb=fb->next) {
if(fa->kernel_id == fb->kernel_id)
sum+=fa->factor*fb->factor*single_kernel(kernel_parm,fa,fb);
}
}
return(sum);
}
CFLOAT single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b)
/* calculate the kernel function between two vectors */
{
kernel_cache_statistic++;
switch(kernel_parm->kernel_type) {
case 0: /* linear */
return((CFLOAT)sprod_ss(a,b));
case 1: /* polynomial */
return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree));
case 2: /* radial basis function */
return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq)));
case 3: /* sigmoid neural net */
return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const));
case 4: /* custom-kernel supplied in file kernel.h*/
return((CFLOAT)custom_kernel(kernel_parm,a,b));
default: printf("Error: Unknown kernel function\n"); exit(1);
}
}
SVECTOR *create_svector(WORD *words,char *userdefined,double factor)
{
SVECTOR *vec;
long fnum,i;
fnum=0;
while(words[fnum].wnum) {
fnum++;
}
fnum++;
vec = (SVECTOR *)my_malloc(sizeof(SVECTOR));
vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum));
for(i=0;i<fnum;i++) {
vec->words[i]=words[i];
}
vec->twonorm_sq=sprod_ss(vec,vec);
fnum=0;
while(userdefined[fnum]) {
fnum++;
}
fnum++;
vec->userdefined = (char *)my_malloc(sizeof(char)*(fnum));
for(i=0;i<fnum;i++) {
vec->userdefined[i]=userdefined[i];
}
vec->kernel_id=0;
vec->next=NULL;
vec->factor=factor;
return(vec);
}
SVECTOR *copy_svector(SVECTOR *vec)
{
SVECTOR *newvec=NULL;
if(vec) {
newvec=create_svector(vec->words,vec->userdefined,vec->factor);
newvec->next=copy_svector(vec->next);
}
return(newvec);
}
void free_svector(SVECTOR *vec)
{
if(vec) {
free(vec->words);
if(vec->userdefined)
free(vec->userdefined);
free_svector(vec->next);
free(vec);
}
}
double sprod_ss(SVECTOR *a, SVECTOR *b)
/* compute the inner product of two sparse vectors */
{
register CFLOAT sum=0;
register WORD *ai,*bj;
ai=a->words;
bj=b->words;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
bj++;
}
else if (ai->wnum < bj->wnum) {
ai++;
}
else {
sum+=(CFLOAT)(ai->weight) * (CFLOAT)(bj->weight);
ai++;
bj++;
}
}
return((double)sum);
}
SVECTOR* sub_ss(SVECTOR *a, SVECTOR *b)
/* compute the difference a-b of two sparse vectors */
/* Note: SVECTOR lists are not followed, but only the first
SVECTOR is used */
{
SVECTOR *vec;
register WORD *sum,*sumi;
register WORD *ai,*bj;
long veclength;
ai=a->words;
bj=b->words;
veclength=0;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
veclength++;
bj++;
}
else if (ai->wnum < bj->wnum) {
veclength++;
ai++;
}
else {
veclength++;
ai++;
bj++;
}
}
while (bj->wnum) {
veclength++;
bj++;
}
while (ai->wnum) {
veclength++;
ai++;
}
veclength++;
sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
sumi=sum;
ai=a->words;
bj=b->words;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
(*sumi)=(*bj);
sumi->weight*=(-1);
sumi++;
bj++;
}
else if (ai->wnum < bj->wnum) {
(*sumi)=(*ai);
sumi++;
ai++;
}
else {
(*sumi)=(*ai);
sumi->weight-=bj->weight;
if(sumi->weight != 0)
sumi++;
ai++;
bj++;
}
}
while (bj->wnum) {
(*sumi)=(*bj);
sumi->weight*=(-1);
sumi++;
bj++;
}
while (ai->wnum) {
(*sumi)=(*ai);
sumi++;
ai++;
}
sumi->wnum=0;
vec=create_svector(sum,"",1.0);
free(sum);
return(vec);
}
SVECTOR* add_ss(SVECTOR *a, SVECTOR *b)
/* compute the sum a+b of two sparse vectors */
/* Note: SVECTOR lists are not followed, but only the first
SVECTOR is used */
{
SVECTOR *vec;
register WORD *sum,*sumi;
register WORD *ai,*bj;
long veclength;
ai=a->words;
bj=b->words;
veclength=0;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
veclength++;
bj++;
}
else if (ai->wnum < bj->wnum) {
veclength++;
ai++;
}
else {
veclength++;
ai++;
bj++;
}
}
while (bj->wnum) {
veclength++;
bj++;
}
while (ai->wnum) {
veclength++;
ai++;
}
veclength++;
/*** is veclength=lengSequence(a)+lengthSequence(b)? ***/
sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
sumi=sum;
ai=a->words;
bj=b->words;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
(*sumi)=(*bj);
sumi++;
bj++;
}
else if (ai->wnum < bj->wnum) {
(*sumi)=(*ai);
sumi++;
ai++;
}
else {
(*sumi)=(*ai);
sumi->weight+=bj->weight;
if(sumi->weight != 0)
sumi++;
ai++;
bj++;
}
}
while (bj->wnum) {
(*sumi)=(*bj);
sumi++;
bj++;
}
while (ai->wnum) {
(*sumi)=(*ai);
sumi++;
ai++;
}
sumi->wnum=0;
vec=create_svector(sum,"",1.0);
free(sum);
return(vec);
}
SVECTOR* add_list_ss(SVECTOR *a)
/* computes the linear combination of the SVECTOR list weighted
by the factor of each SVECTOR */
{
SVECTOR *scaled,*oldsum,*sum,*f;
WORD empty[2];
if(a){
sum=smult_s(a,a->factor);
for(f=a->next;f;f=f->next) {
scaled=smult_s(f,f->factor);
oldsum=sum;
sum=add_ss(sum,scaled);
free_svector(oldsum);
free_svector(scaled);
}
sum->factor=1.0;
}
else {
empty[0].wnum=0;
sum=create_svector(empty,"",1.0);
}
return(sum);
}
void append_svector_list(SVECTOR *a, SVECTOR *b)
/* appends SVECTOR b to the end of SVECTOR a. */
{
SVECTOR *f;
for(f=a;f->next;f=f->next); /* find end of first vector list */
f->next=b; /* append the two vector lists */
}
SVECTOR* smult_s(SVECTOR *a, double factor)
/* scale sparse vector a by factor */
{
SVECTOR *vec;
register WORD *sum,*sumi;
register WORD *ai;
long veclength;
ai=a->words;
veclength=0;
while (ai->wnum) {
veclength++;
ai++;
}
veclength++;
sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
sumi=sum;
ai=a->words;
while (ai->wnum) {
(*sumi)=(*ai);
sumi->weight*=factor;
if(sumi->weight != 0)
sumi++;
ai++;
}
sumi->wnum=0;
vec=create_svector(sum,a->userdefined,a->factor);
free(sum);
return(vec);
}
int featvec_eq(SVECTOR *a, SVECTOR *b)
/* tests two sparse vectors for equality */
{
register WORD *ai,*bj;
ai=a->words;
bj=b->words;
while (ai->wnum && bj->wnum) {
if(ai->wnum > bj->wnum) {
if((CFLOAT)(bj->weight) != 0)
return(0);
bj++;
}
else if (ai->wnum < bj->wnum) {
if((CFLOAT)(ai->weight) != 0)
return(0);
ai++;
}
else {
if((CFLOAT)(ai->weight) != (CFLOAT)(bj->weight))
return(0);
ai++;
bj++;
}
}
return(1);
}
double model_length_s(MODEL *model, KERNEL_PARM *kernel_parm)
/* compute length of weight vector */
{
register long i,j;
register double sum=0,alphai;
register DOC *supveci;
for(i=1;i<model->sv_num;i++) {
alphai=model->alpha[i];
supveci=model->supvec[i];
for(j=1;j<model->sv_num;j++) {
sum+=alphai*model->alpha[j]
*kernel(kernel_parm,supveci,model->supvec[j]);
}
}
return(sqrt(sum));
}
void clear_vector_n(double *vec, long int n)
{
register long i;
for(i=0;i<=n;i++) vec[i]=0;
}
void add_vector_ns(double *vec_n, SVECTOR *vec_s, double faktor)
{
register WORD *ai;
ai=vec_s->words;
while (ai->wnum) {
vec_n[ai->wnum]+=(faktor*ai->weight);
ai++;
}
}
double sprod_ns(double *vec_n, SVECTOR *vec_s)
{
register double sum=0;
register WORD *ai;
ai=vec_s->words;
while (ai->wnum) {
sum+=(vec_n[ai->wnum]*ai->weight);
ai++;
}
return(sum);
}
void add_weight_vector_to_linear_model(MODEL *model)
/* compute weight vector in linear case and add to model */
{
long i;
SVECTOR *f;
model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
clear_vector_n(model->lin_weights,model->totwords);
for(i=1;i<model->sv_num;i++) {
for(f=(model->supvec[i])->fvec;f;f=f->next)
add_vector_ns(model->lin_weights,f,f->factor*model->alpha[i]);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -