📄 svm_struct_api.c
字号:
}
void write_struct_model(char *file, STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm)
{
/* Writes structural model sm to file file. */
MODEL *svm_model;
long i,totwords;
WORD *w;
double *ortho_weights;
/* Store model in normal svm-light format */
if(sm->sparse_kernel_type > 0) {
add_weight_vector_to_linear_model(sm->svm_model);
svm_model=(MODEL *)malloc(sizeof(MODEL));
(*svm_model)=(*sm->svm_model);
svm_model->sv_num=sm->expansion_size+1;
svm_model->kernel_parm.kernel_type=sm->sparse_kernel_type;
svm_model->supvec=sm->expansion-1;
svm_model->alpha=(double *)my_malloc(sizeof(double)*(svm_model->sv_num));
ortho_weights=prod_nvector_ltmatrix(sm->svm_model->lin_weights+1,sm->invL);
for(i=0;i<sm->expansion_size;i++)
svm_model->alpha[i+1]=ortho_weights[i];
free_nvector(ortho_weights);
totwords=0;
for(i=1;i<svm_model->sv_num;i++) /* find highest feature number */
for(w=svm_model->supvec[i]->fvec->words;w->wnum;w++)
if(totwords < w->wnum)
totwords=w->wnum;
svm_model->totwords=totwords;
write_model(file,svm_model);
free(svm_model->alpha);
free(svm_model);
}
else {
write_model(file,sm->svm_model);
}
}
STRUCTMODEL read_struct_model(char *file, STRUCT_LEARN_PARM *sparm)
{
/* Reads structural model sm from file file. This function is used
only in the prediction module, not in the learning module. */
STRUCTMODEL sm;
sm.svm_model=read_model(file);
sparm->loss_function=ERRORRATE;
sparm->bias=0;
sparm->bias_featurenum=0;
sparm->num_features=sm.svm_model->totwords;
sm.invL=NULL;
sm.expansion=NULL;
return(sm);
}
void write_label(FILE *fp, LABEL y)
{
/* Writes label y to file handle fp. */
int i;
for(i=0;i<y.totdoc;i++) {
fprintf(fp,"%.8f\n",y.class[i]);
}
}
void free_pattern(PATTERN x) {
/* Frees the memory of x. */
int i;
for(i=0;i<x.totdoc;i++)
free_example(x.doc[i],1);
free(x.doc);
}
void free_label(LABEL y) {
/* Frees the memory of y. */
free(y.class);
}
void free_struct_model(STRUCTMODEL sm)
{
/* Frees the memory of model. */
int i;
/* if(sm.w) free(sm.w); */ /* this is free'd in free_model */
if(sm.svm_model) free_model(sm.svm_model,1);
/* add free calls for user defined data here */
if(sm.invL) free_matrix(sm.invL);
if(sm.expansion) {
for(i=0;i<sm.expansion_size;i++)
free_example(sm.expansion[i],1);
free(sm.expansion);
}
}
void free_struct_sample(SAMPLE s)
{
/* Frees the memory of sample s. */
int i;
for(i=0;i<s.n;i++) {
free_pattern(s.examples[i].x);
free_label(s.examples[i].y);
}
free(s.examples);
}
void print_struct_help()
{
/* Prints a help text that is appended to the common help text of
svm_struct_learn. */
printf(" --b float -> value of L2-bias feature. A value of 0 implies not\n");
printf(" having a bias feature. (default 1)\n");
printf(" WARNING: This is implemented only for linear kernel!\n");
printf(" --p [0..] -> fraction of positive examples to use as value of k for\n");
printf(" Prec@k and Rec@k. 0 indicates to use (0.5 * #pos) for\n");
printf(" Prec@k and (2 * #pos) for Rec@k. #pos is the number of\n");
printf(" positive examples in the training set. (default 0)\n");
printf(" --t [0..] -> Use sparse kernel expansion. Values like those for\n");
printf(" normal kernel (i.e. option -t). (default 0)\n");
printf(" --k [0..] -> Specifies the number of basis functions to use\n");
printf(" for sparse kernel approximation. (default 500)\n");
printf(" --f string -> Specifies file that contains basis functions to use\n");
printf(" for sparse kernel approximation. (default training\n");
printf(" file)\n");
printf("\nThe following loss functions can be selected with the -l option:\n");
printf(" %2d Zero/one loss: 1 if vector of predictions contains error, 0 otherwise.\n",ZEROONE);
printf(" %2d F1: 100 minus the F1-score in percent.\n",FONE);
printf(" %2d Errorrate: Percentage of errors in prediction vector.\n",ERRORRATE);
printf(" %2d Prec/Rec Breakeven: 100 minus PRBEP in percent.\n",PRBEP);
printf(" %2d Prec@k: 100 minus precision at k in percent.\n",PREC_K);
printf(" %2d Rec@k: 100 minus recall at k in percent.\n",REC_K);
printf(" %2d ROCArea: Percentage of swapped pos/neg pairs (i.e. 100 - ROCArea).\n\n",SWAPPEDPAIRS);
printf("NOTE: The '-c' parameters in SVM-light and SVM-perf are related as\n");
printf(" c_light = c_perf*100/n for the 'Errorrate' loss function, where n is the\n");
printf(" number of training examples.\n\n");
printf("The algorithms implemented in SVM-perf are described in:\n");
printf("- T. Joachims, A Support Vector Method for Multivariate Performance Measures,\n");
printf(" Proceedings of the International Conference on Machine Learning (ICML), 2005.\n");
printf("- T. Joachims, Training Linear SVMs in Linear Time, Proceedings of the \n");
printf(" ACM Conference on Knowledge Discovery and Data Mining (KDD), 2006.\n");
printf(" -> Papers are available at http://www.joachims.org/\n\n");
}
void parse_struct_parameters(STRUCT_LEARN_PARM *sparm)
{
/* Parses the command line parameters that start with -- */
int i;
sparm->bias=1;
sparm->prec_rec_k_frac=0.0;
sparm->sparse_kernel_type=LINEAR;
sparm->sparse_kernel_size=500;
strcpy(sparm->sparse_kernel_file,"");
/* set number of features to -1, indicating that it will be computed
in init_struct_model() */
sparm->num_features=-1;
for(i=0;(i<sparm->custom_argc) && ((sparm->custom_argv[i])[0] == '-');i++) {
switch ((sparm->custom_argv[i])[2])
{
case 'b': i++; sparm->bias=atof(sparm->custom_argv[i]); break;
case 'p': i++; sparm->prec_rec_k_frac=atof(sparm->custom_argv[i]); break;
case 't': i++; sparm->sparse_kernel_type=atol(sparm->custom_argv[i]); break;
case 'k': i++; sparm->sparse_kernel_size=atol(sparm->custom_argv[i]); break;
case 'f': i++; strcpy(sparm->sparse_kernel_file,sparm->custom_argv[i]); break;
default: printf("\nUnrecognized option %s!\n\n",sparm->custom_argv[i]);
exit(0);
}
}
/* Note that the validity of the value for sparm->prec_rec_k_frac in
relation to #pos is checked in read_struct_examples() */
if(sparm->prec_rec_k_frac < 0) {
printf("\nThe value of option --k must be greater then zero!\n\n");
exit(0);
}
}
void print_struct_help_classify()
{
/* Prints a help text that is appended to the common help text of
svm_struct_classify. */
printf(" --* string -> custom parameters that can be adapted for struct\n");
printf(" learning. The * can be replaced by any character\n");
printf(" and there can be multiple options starting with --.\n");
}
void parse_struct_parameters_classify(char *attribute, char *value)
{
/* Parses one command line parameters that start with -- . The name
of the parameter is given in attribute, the value is given in
value. */
switch (attribute[2])
{
/* case 'x': strcpy(xvalue,value); break; */
default: printf("\nUnrecognized option %s!\n\n",attribute);
exit(0);
}
}
/*------- Performance measures --------*/
double zeroone(int a, int b, int c, int d)
{
if((a+d) == (a+b+c+d))
return(0.0);
else
return(1.0);
}
double fone(int a, int b, int c, int d)
{
if((a+b == 0) || (a+c == 0)) return(0.0);
double precision=prec(a,b,c,d);
double recall=rec(a,b,c,d);
return(2.0*precision*recall/(precision+recall));
}
double prec(int a, int b, int c, int d)
{
/* Returns precision as fractional value. */
if((a+b) == 0) return(0.0);
return((double)a/(double)(a+b));
}
double rec(int a, int b, int c, int d)
{
/* Returns recall as fractional value. */
if((a+c) == 0) return(0.0);
return((double)a/(double)(a+c));
}
double errorrate(int a, int b, int c, int d)
{
/* Returns number of errors. */
if((a+b+c+d) == 0) return(0.0);
return(((double)(b+c))/(double)(a+b+c+d));
}
double swappedpairs(LABEL y, LABEL ybar)
{
/* Returns percentage of swapped pos/neg pairs (i.e. 100 - ROC Area) for
prediction vectors that encode the number of misranked examples
for each particular example. */
/* WARNING: Works only for labels in the compressed representation */
int i;
double sum=0;
for(i=0;i<y.totdoc;i++)
sum+=fabs(y.class[i]-ybar.class[i]);
return(sum/2.0);
}
double rocarea(LABEL y, LABEL ybar)
{
/* Returns ROC Area for ybar containing scores that define a ranking
of examples. Breaks ties in ranking pessimistically. */
long i,nump,numn;
double swappedpairs;
STRUCT_ID_SCORE *predset;
predset=(STRUCT_ID_SCORE *)my_malloc(sizeof(STRUCT_ID_SCORE)*(ybar.totdoc+1));
for(i=0;i<ybar.totdoc;i++) {
predset[i].score=ybar.class[i];
predset[i].tiebreak=-y.class[i];
predset[i].id=i;
}
qsort(predset,ybar.totdoc,sizeof(STRUCT_ID_SCORE),comparedown);
numn=0;
nump=0;
swappedpairs=0;
for(i=0;i<ybar.totdoc;i++) {
if(y.class[predset[i].id] > 0) {
swappedpairs+=numn;
nump++;
}
else {
numn++;
}
}
free(predset);
return(100.0-100.0*swappedpairs/((double)numn)/((double)nump));
}
double prbep(LABEL y, LABEL ybar)
{
/* Returns PRBEP for ybar containing scores that define a ranking
of examples. Breaks ties in ranking pessimistically. */
long i,nump,a;
STRUCT_ID_SCORE *predset;
predset=(STRUCT_ID_SCORE *)my_malloc(sizeof(STRUCT_ID_SCORE)*(ybar.totdoc+1));
nump=0;
for(i=0;i<ybar.totdoc;i++) {
predset[i].score=ybar.class[i];
predset[i].tiebreak=-y.class[i];
predset[i].id=i;
if(y.class[i] > 0)
nump++;
}
qsort(predset,ybar.totdoc,sizeof(STRUCT_ID_SCORE),comparedown);
a=0;
for(i=0;i<nump;i++) {
if(y.class[predset[i].id] > 0) {
a++;
}
}
free(predset);
return(100.0*prec(a,nump-a,0,0));
}
double avgprec_compressed(LABEL y, LABEL ybar)
{
/* Returns Average Precision for y and ybar in compressed
representation (also see avgprec()). Breaks ties in ranking
pessimistically. */
int i,ii,nump,numn,a,b;
double apr;
STRUCT_ID_SCORE *predset;
nump=0;
numn=0;
for(i=0;i<ybar.totdoc;i++) {
if(y.class[i] > 0)
nump++;
else
numn++;
}
/* printf("nump=%d, numn=%d\n", nump, numn); */
ii=0;
predset=(STRUCT_ID_SCORE *)my_malloc(sizeof(STRUCT_ID_SCORE)*(nump+1));
for(i=0;i<ybar.totdoc;i++) {
if(y.class[i] > 0) {
predset[ii].score=ybar.class[i];
predset[ii].tiebreak=-y.class[i];
predset[ii].id=i;
ii++;
}
}
qsort(predset,nump,sizeof(STRUCT_ID_SCORE),comparedown);
apr=0;
for(a=1;a<=nump;a++) {
b=(numn-predset[a-1].score)/2;
/* printf("negabove[%d]=%d,",a,b); */
apr+=prec(a,b,0,0);
}
free(predset);
return(100.0*(apr/(double)(nump)));
}
double avgprec(LABEL y, LABEL ybar)
{
/* Returns Average Precision for ybar containing scores that define a ranking
of examples. Breaks ties in ranking pessimistically. */
long i,nump,numn;
double apr;
STRUCT_ID_SCORE *predset;
predset=(STRUCT_ID_SCORE *)my_malloc(sizeof(STRUCT_ID_SCORE)*(ybar.totdoc+1));
for(i=0;i<ybar.totdoc;i++) {
predset[i].score=ybar.class[i];
predset[i].tiebreak=-y.class[i];
predset[i].id=i;
}
qsort(predset,ybar.totdoc,sizeof(STRUCT_ID_SCORE),comparedown);
numn=0;
nump=0;
apr=0;
for(i=0;i<ybar.totdoc;i++) {
if(y.class[predset[i].id] > 0) {
nump++;
apr+=prec(nump,numn,0,0);
}
else {
numn++;
}
}
free(predset);
return(100.0*(apr/(double)(nump)));
}
/*------- Loss functions based on performance measures --------*/
double zeroone_loss(int a, int b, int c, int d)
{
return(zeroone(a,b,c,d));
}
double fone_loss(int a, int b, int c, int d)
{
return(100.0*(1.0-fone(a,b,c,d)));
}
double errorrate_loss(int a, int b, int c, int d)
{
return(100.0*errorrate(a,b,c,d));
}
double prbep_loss(int a, int b, int c, int d)
{
/* WARNING: Returns lower bound on PRBEP, if b!=c. */
double precision=prec(a,b,c,d);
double recall=rec(a,b,c,d);
if(precision < recall)
return(100.0*(1.0-precision));
else
return(100.0*(1.0-recall));
}
double prec_k_loss(int a, int b, int c, int d)
{
/* WARNING: Only valid if called with a+c==k. */
return(100.0*(1.0-prec(a,b,c,d)));
}
double rec_k_loss(int a, int b, int c, int d)
{
/* WARNING: Only valid if called with a+c==k. */
return(100.0*(1.0-rec(a,b,c,d)));
}
double swappedpairs_loss(LABEL y, LABEL ybar)
{
double nump=0,numn=0;
long i;
for(i=0;i<y.totdoc;i++) {
if(y.class[i] > 0)
nump++;
else
numn++;
}
/* return(100.0*swappedpairs(y,ybar)/(nump*numn)); */
return(swappedpairs(y,ybar));
}
double avgprec_loss(LABEL y, LABEL ybar)
{
return(100.0-avgprec_compressed(y,ybar));
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -