📄 svm.cpp
字号:
int CSVM::read_model(char *modelfile,MODEL *model,long max_words,long ll)
{
FILE *modelfl;
long j,i;
char *line;
SVM_WORD *words;
register long wpos;
long wnum,pos;
double weight;
char version_buffer[100];
if (com_pro.show_action)
{
sprintf(temstr,"Reading model...");
printm(temstr);
}
words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words+10));
line = (char *)my_malloc(sizeof(char)*ll);
if ((modelfl = fopen (modelfile, "r")) == NULL)
{
printe (modelfile);
return -1;
}
fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
if(strcmp(version_buffer,VERSION))
{
printe ("Version of model-file does not match version of svm_classify!");
return -1;
}
fscanf(modelfl,"%ld # kernel type\n",&(model->kernel_parm.kernel_type));
fscanf(modelfl,"%ld # kernel parameter -d \n",&(model->kernel_parm.poly_degree));
fscanf(modelfl,"%lf # kernel parameter -g \n",&(model->kernel_parm.rbf_gamma));
fscanf(modelfl,"%lf # kernel parameter -s \n",&(model->kernel_parm.coef_lin));
fscanf(modelfl,"%lf # kernel parameter -r \n",&(model->kernel_parm.coef_const));
fscanf(modelfl,"%s # kernel parameter -u \n",&(model->kernel_parm.custom));
fscanf(modelfl,"%ld # highest feature index \n",&(model->totwords));
fscanf(modelfl,"%ld # number of training documents \n",&(model->totdoc));
fscanf(modelfl,"%ld # number of support vectors plus 1 \n",&(model->sv_num));
fscanf(modelfl,"%lf # threshold b \n",&(model->b));
for(i=1;i<model->sv_num;i++)
{
fgets(line,(int)ll,modelfl);
pos=0;
wpos=0;
sscanf(line,"%lf",&model->alpha[i]);
while(line[++pos]>' ');
while((sscanf(line+pos,"%ld:%lf",&wnum,&weight) != EOF) && (wpos<max_words))
{
while(line[++pos]>' ');
words[wpos].wnum=wnum;
words[wpos].weight=weight;
wpos++;
}
model->supvec[i] = (DOC *)my_malloc(sizeof(DOC));
(model->supvec[i])->words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(wpos+1));
for(j=0;j<wpos;j++)
{
(model->supvec[i])->words[j]=words[j];
}
((model->supvec[i])->words[wpos]).wnum=0;
(model->supvec[i])->twonorm_sq = sprod_ss((model->supvec[i])->words,(model->supvec[i])->words);
(model->supvec[i])->docnum = -1;
}
fclose(modelfl);
free(line);
free(words);
if (com_pro.show_readfile)
{
sprintf(temstr, "OK. (%d support vectors read)",(int)(model->sv_num-1));
printm(temstr);
}
}
/*read the data from text documents*/
int CSVM::read_documents(char *docfile,
DOC *docs,
long *label,
long max_words_doc,
long ll,
long *totwords,
long *totdoc,
int post_label)
{
char *line;
DOC doc;
long dnum=0,wpos,i,dpos=0,dneg=0,dunlab=0;
long doc_label;
FILE *docfl;
line = (char *)my_malloc(sizeof(char)*ll);
if ((docfl = fopen (docfile, "r")) == NULL)
{
printe (docfile);
return -1;
}
doc.words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words_doc+10));
if (com_pro.show_readfile)
{
sprintf(temstr,"Reading examples into memory...");
printm(temstr);
}
dnum=0;
(*totwords)=0;
while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
if(line[0] == '#') continue; /* line contains comments */
if(!parse_document(line,&doc,&doc_label,&wpos,max_words_doc))
{
sprintf(temstr,"Parsing error in line %ld!",dnum);
printm(temstr);
}
if(doc_label==0)
{
label[dnum]=0;
dunlab++;
}
else if(doc_label==post_label)
{
label[dnum]=1;
dpos++;
}
else
{
label[dnum]=-1;
dneg++;
}
if((wpos>1) && ((doc.words[wpos-2]).wnum>(*totwords)))
(*totwords)=(doc.words[wpos-2]).wnum;
docs[dnum].words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*wpos);
docs[dnum].docnum=dnum;
for(i=0;i<wpos;i++)
docs[dnum].words[i]=doc.words[i];
docs[dnum].twonorm_sq=doc.twonorm_sq;
dnum++;
if((dnum % 100) == 0&&com_pro.show_readfile)
{
sprintf(temstr,"read %ld..",dnum);
printm(temstr);
}
}
fclose(docfl);
free(line);
free(doc.words);
if (com_pro.show_action)
{
sprintf(temstr, "OK. (%ld examples read)", dnum);
printm(temstr);
sprintf(temstr,"%ld positive, %ld negative, and %ld unlabeled examples.",dpos,dneg,dunlab);
printm(temstr);
}
(*totdoc)=dnum;
}
/*Parse one line of data file */
int CSVM::parse_document(char *line,DOC *doc,long *label,long*numwords,long max_words_doc)
{
register long wpos,pos;
long wnum;
double weight;
pos=0;
while(line[pos])
{ /* cut off comments */
if(line[pos] == '#')
{
line[pos]=0;
}
else
{
pos++;
}
}
wpos=0;
if((sscanf(line,"%ld",label)) == EOF) return(0);
pos=0;
while(line[pos]==' ') pos++;
while(line[pos]>' ') pos++;
while((sscanf(line+pos,"%ld:%lf",&wnum,&weight) != EOF) && (wpos<max_words_doc))
{
while(line[pos++]==' ');
while(line[++pos]>' ');
if(wnum<=0)
{
printe ("Feature numbers must be larger or equal to 1!!!");
sprintf(temstr,"LINE: %s",line);
printm(temstr);
return (0);
}
if((wpos>0) && ((doc->words[wpos-1]).wnum >= wnum))
{
printe ("Features must be in increasing order!!!");
sprintf(temstr,"LINE: %s",line);
printm(temstr);
return (0);
}
(doc->words[wpos]).wnum=wnum;
(doc->words[wpos]).weight=weight;
wpos++;
}
(doc->words[wpos]).wnum=0;
(*numwords)=wpos+1;
doc->docnum=-1;
doc->twonorm_sq=sprod_ss(doc->words,doc->words);
return(1);
}
/* grep through file and count number of lines,
maximum number of spaces per line, and
longest line. */
int CSVM::nol_ll(char *file,long *nol,long *wol,long *ll)
{
FILE *fl;
int ic;
char c;
long current_length,current_wol;
if ((fl = fopen (file, "r")) == NULL)
{
printe (file);
return -1;
}
current_length=0;
current_wol=0;
(*ll)=0;
(*nol)=1;
(*wol)=0;
while((ic=getc(fl)) != EOF)
{
c=(char)ic;
current_length++;
if(c == ' ')
{
current_wol++;
}
if(c == '\n')
{
(*nol)++;
if(current_length>(*ll))
{
(*ll)=current_length;
}
if(current_wol>(*wol))
{
(*wol)=current_wol;
}
current_length=0;
current_wol=0;
}
}
fclose(fl);
}
long CSVM::minl(long a,long b)
{
if(a<b)
return(a);
else
return(b);
}
long CSVM::maxl(long a,long b)
{
if(a>b)
return(a);
else
return(b);
}
long CSVM::get_runtime()
{
clock_t start;
start = clock();
return((long)((double)start*100.0/(double)CLOCKS_PER_SEC));
}
int CSVM::isnan(double a)
{
return(_isnan(a));
}
void * CSVM::my_malloc(long size)
{
void *ptr;
ptr=(void *)malloc(size);
if(!ptr)
{
printe ("Out of memory!");
return (NULL);
}
return(ptr);
}
//print error on screen
void CSVM::printe(char* pInfo)
{
CMessage::PrintError(pInfo);
}
//print message on screen
void CSVM::printm(char* pInfo)
{
CMessage::PrintInfo(pInfo);
}
//custom kernel function
/////////////////////////////////////////////////////////////////
//chen 2001.09.14
/////////////////////////////////////////////////////////////////
double CSVM::ktt(int ta,int tb,double pa[],double pb[])
{
int ya,yb;
ya=((ta-1)%13)+1;
yb=((tb-1)%13)+1;
if (ya<13&&yb<13)
return pa[ta]*pa[ta+1]*pb[tb]*pb[tb+1];
else return 0.0;
}
double CSVM::kt(int t,double ta[],double tb[])
{
int x,y;
double sum=0.0;
x=((t-1)/16)+1;
y=((t-1)%16)+1;
if (x>1)
sum+=ta[t]*tb[t]*ta[t-16]*tb[t-16];
if (x>2)
sum+=ta[t]*tb[t]*ta[t-32]*tb[t-32];
if (x>3)
sum+=ta[t]*tb[t]*ta[t-48]*tb[t-48];
if (x<14)
sum+=ta[t]*tb[t]*ta[t-48]*tb[t-48];
if (x<15)
sum+=ta[t]*tb[t]*ta[t-32]*tb[t-32];
if (x<16)
sum+=ta[t]*tb[t]*ta[t+16]*tb[t+16];
if (y>3)
sum+=ta[t]*tb[t]*ta[t-3]*tb[t-3];
if (y>2)
sum+=ta[t]*tb[t]*ta[t-2]*tb[t-2];
if (y>1)
sum+=ta[t]*tb[t]*ta[t-1]*tb[t-1];
if (y<14)
sum+=ta[t]*tb[t]*ta[t+3]*tb[t+3];
if (y<15)
sum+=ta[t]*tb[t]*ta[t+2]*tb[t+2];
if (y<16)
sum+=ta[t]*tb[t]*ta[t+1]*tb[t+1];
return sum;
}
double CSVM::fi(double* tt)
{
int x,y;
double sum=0.0;
for (int t=1;t<=52;t++)
{
x=((t-1)/13)+1;
y=((t-1)%13)+1;
if (y<13)
sum+=tt[t]*tt[t+1];
}
return sum;
}
double CSVM::fs(double ta[])
{
double sum=0.0;
int x,y;
for (int i=1;i<256;i++)
{
x=((i-1)/16)+1;
y=((i-1)%16)+1;
if (x<16)
sum+=ta[i]*ta[i+16];
if (y<16)
sum+=ta[i]*ta[i+1];
}
return sum;
}
double CSVM::sumofword(DOC* a)
{
double sum=0.0;
SVM_WORD* pwa=a->words;
while (pwa->wnum)
{
sum+=pwa->weight;
pwa++;
}
return sum;
}
double CSVM::custom_kernel(KERNEL_PARM *kernel_parm,DOC *a,DOC*b)
{
double sum=0;
SVM_WORD *ai,*bj;
ai=a->words;
bj=b->words;
while (ai->wnum || bj->wnum)
{
if(ai->wnum == bj->wnum)
{
sum+=(fabs(ai->weight-bj->weight))*(fabs(ai->weight-bj->weight));
ai++;bj++;
}
else if ((ai!=0) &&(ai->wnum<bj->wnum || bj->wnum==0))
{
sum+=fabs(ai->weight)*fabs(ai->weight);
ai++;
}
else if ((bj!=0) &&(bj->wnum<ai->wnum|| ai->wnum==0))
{
sum+=fabs(bj->weight)*fabs(bj->weight);
bj++;
}
}
// case 1: /* polynomial *///
//return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree));
//case 2: /* radial basis function */
// return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
//case 3: /* sigmoid neural net */
// return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const));
//case 4: /* custom-kernel supplied in file kernel.h*/
// return((CFLOAT)custom_kernel(kernel_parm,a,b));
/*
SVM_WORD *ai,*bj;
ai=a->words;
bj=b->words;
double suma=0.0,sumb=0.0;
while (ai->wnum )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -