📄 svm_struct_api.c
字号:
/***********************************************************************/
/* */
/* svm_struct_api.c (instantiated for SVM-perform) */
/* */
/* Definition of API for attaching implementing SVM learning of */
/* structures (e.g. parsing, multi-label classification, HMM) */
/* */
/* Author: Thorsten Joachims */
/* Date: 31.10.05 */
/* */
/* Copyright (c) 2005 Thorsten Joachims - All rights reserved */
/* */
/* This software is available for non-commercial use only. It must */
/* not be modified and distributed without prior permission of the */
/* author. The author is not responsible for implications from the */
/* use of this software. */
/* */
/***********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "svm_struct_api.h"
#include "svm_light/svm_common.h"
#include "svm_struct/svm_struct_common.h"
#include "svm_struct/svm_struct_learn.h"
#define MAX(x,y) ((x) < (y) ? (y) : (x))
#define MIN(x,y) ((x) > (y) ? (y) : (x))
#define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0))))
int compareup(const void *a, const void *b)
{
double va,vb;
va=((STRUCT_ID_SCORE *)a)->score;
vb=((STRUCT_ID_SCORE *)b)->score;
if(va == vb) {
va=((STRUCT_ID_SCORE *)a)->tiebreak;
vb=((STRUCT_ID_SCORE *)b)->tiebreak;
}
return((va > vb) - (va < vb));
}
int comparedown(const void *a, const void *b)
{
return(-compareup(a,b));
}
LABEL find_most_violated_constraint_errorrate(PATTERN x, LABEL y,
STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm,
int loss_type);
LABEL find_most_violated_constraint_thresholdmetric(PATTERN x, LABEL y,
STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm,
int loss_type);
LABEL find_most_violated_constraint_rankmetric(PATTERN x, LABEL y,
STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm,
int loss_type);
LABEL find_most_violated_constraint_avgprec(PATTERN x, LABEL y,
STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm,
int loss_type);
double zeroone(int a, int b, int c, int d);
double fone(int a, int b, int c, int d);
double errorrate(int a, int b, int c, int d);
double prec(int a, int b, int c, int d);
double rec(int a, int b, int c, int d);
double swappedpairs(LABEL y, LABEL ybar);
double rocarea(LABEL y, LABEL ybar);
double prbep(LABEL y, LABEL ybar);
double avgprec(LABEL y, LABEL ybar);
double zeroone_loss(int a, int b, int c, int d);
double fone_loss(int a, int b, int c, int d);
double errorrate_loss(int a, int b, int c, int d);
double prbep_loss(int a, int b, int c, int d);
double prec_k_loss(int a, int b, int c, int d);
double rec_k_loss(int a, int b, int c, int d);
double swappedpairs_loss(LABEL y, LABEL ybar);
double avgprec_loss(LABEL y, LABEL ybar);
void svm_struct_learn_api_init(int argc, char* argv[])
{
/* Called in learning part before anything else is done to allow
any initializations that might be necessary. */
}
void svm_struct_learn_api_exit()
{
/* Called in learning part at the very end to allow any clean-up
that might be necessary. */
}
void svm_struct_classify_api_init(int argc, char* argv[])
{
/* Called in prediction part before anything else is done to allow
any initializations that might be necessary. */
}
void svm_struct_classify_api_exit()
{
/* Called in prediction part at the very end to allow any clean-up
that might be necessary. */
}
SAMPLE read_struct_examples(char *file, STRUCT_LEARN_PARM *sparm)
{
/* Reads struct examples and returns them in sample. The number of
examples must be written into sample.n */
SAMPLE sample; /* sample */
EXAMPLE *examples;
long n; /* number of examples */
long totwords, maxlength=0, length, i, j, nump=0, numn=0;
WORD *words,*w;
/* testing the matrix code
MATRIX *G,*L,*LT,*GG,*LI,*I;
G=create_matrix(3,3);
G->element[0][0]=5;
G->element[0][1]=2;
G->element[0][2]=3;
G->element[1][1]=5;
G->element[1][2]=1;
G->element[2][2]=5;
print_matrix(G);
L=cholesky_matrix(G);
print_matrix(L);
LT=transpose_matrix(L);
print_matrix(LT);
GG=prod_matrix_matrix(L,LT);
print_matrix(GG);
LI=invert_ltriangle_matrix(L);
print_matrix(LI);
I=prod_matrix_matrix(L,LI);
print_matrix(I);
*/
/* we have only one big example */
examples=(EXAMPLE *)my_malloc(sizeof(EXAMPLE));
/* Using the read_documents function from SVM-light */
read_documents(file,&examples[0].x.doc,&examples[0].y.class,&totwords,&n);
examples[0].x.totdoc=n;
examples[0].y.totdoc=n;
sample.n=1;
sample.examples=examples;
for(i=0;i<sample.examples[0].x.totdoc;i++) {
length=1;
if(sample.examples[0].y.class[i]>0)
nump++;
else
numn++;
for(w=sample.examples[0].x.doc[i]->fvec->words;w->wnum;w++) {
length++;
if(length > maxlength) /* find vector with max elements */
maxlength=length;
}
}
/* add feature for bias if necessary */
/* WARNING: Currently this works correctly only for linear kernel! */
sparm->bias_featurenum=0;
if(sparm->bias != 0) {
words = (WORD *)my_malloc(sizeof(WORD)*(maxlength+1));
sparm->bias_featurenum=totwords+1;
totwords++;
for(i=0;i<sample.examples[0].x.totdoc;i++) {
for(j=0;sample.examples[0].x.doc[i]->fvec->words[j].wnum;j++)
words[j]=sample.examples[0].x.doc[i]->fvec->words[j];
words[j].wnum=sparm->bias_featurenum; /* bias */
words[j].weight=sparm->bias;
j++;
words[j].wnum=0; /* end */
words[j].weight=0;
free_svector(sample.examples[0].x.doc[i]->fvec);
sample.examples[0].x.doc[i]->fvec=create_svector(words,"",1.0);
}
free(words);
}
/* Remove all features with numbers larger than num_features, if
num_features is set to a positive value. This is important for
svm_struct_classify. */
if(sparm->num_features > 0)
for(i=0;i<sample.examples[0].x.totdoc;i++)
for(j=0;sample.examples[0].x.doc[i]->fvec->words[j].wnum;j++)
if(sample.examples[0].x.doc[i]->fvec->words[j].wnum>sparm->num_features) {
sample.examples[0].x.doc[i]->fvec->words[j].wnum=0;
sample.examples[0].x.doc[i]->fvec->words[j].weight=0;
}
/* change label value for better scaling using thresholdmetrics */
if((sparm->loss_function == ZEROONE)
|| (sparm->loss_function == FONE)
|| (sparm->loss_function == ERRORRATE)
|| (sparm->loss_function == PRBEP)
|| (sparm->loss_function == PREC_K)
|| (sparm->loss_function == REC_K)) {
for(i=0;i<sample.examples[0].x.totdoc;i++) {
if(sample.examples[0].y.class[i]>0)
sample.examples[0].y.class[i]=0.5*100.0/(numn+nump);
else
sample.examples[0].y.class[i]=-0.5*100.0/(numn+nump);
}
}
/* change label value for easy computation of rankmetrics (i.e. ROC-area) */
if(sparm->loss_function == SWAPPEDPAIRS) {
for(i=0;i<sample.examples[0].x.totdoc;i++) {
/* if(sample.examples[0].y.class[i]>0)
sample.examples[0].y.class[i]=numn*0.5;
else
sample.examples[0].y.class[i]=-nump*0.5; */
if(sample.examples[0].y.class[i]>0)
sample.examples[0].y.class[i]=0.5*100.0/nump;
else
sample.examples[0].y.class[i]=-0.5*100.0/numn;
}
}
if(sparm->loss_function == AVGPREC) {
for(i=0;i<sample.examples[0].x.totdoc;i++) {
if(sample.examples[0].y.class[i]>0)
sample.examples[0].y.class[i]=numn;
else
sample.examples[0].y.class[i]=-nump;
}
}
return(sample);
}
void init_struct_model(SAMPLE sample, STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm, LEARN_PARM *lparm,
KERNEL_PARM *kparm)
{
/* Initialize structmodel sm. The weight vector w does not need to be
initialized, but you need to provide the maximum size of the
feature space in sizePsi. This is the maximum number of different
weights that can be learned. Later, the weight vector w will
contain the learned weights for the model. */
long i,j,k,totwords=0, totdoc=0, totexp=0, nump=0, numn=0, new_size;
WORD *words,*w;
DOC **orgdoc,**basis;
double *dummy;
MATRIX *G,*L;
double *indep,ii,weight;
totdoc=sample.examples[0].x.totdoc;
sm->sparse_kernel_type=sparm->sparse_kernel_type;
sm->invL=NULL;
sm->expansion=NULL;
/* When using sparse kernel approximation, this replaces the
original feature vector with the kernel values of the orgininal
feature vector and the expansion. */
if(sm->sparse_kernel_type > 0) {
kparm->kernel_type=sm->sparse_kernel_type;
if(strcmp(sparm->sparse_kernel_file,"")) {
if(struct_verbosity > 0)
printf("Reading basis functions for sparse kernel expansion from '%s'...",sparm->sparse_kernel_file); fflush(stdout);
read_documents(sparm->sparse_kernel_file,&basis,&dummy,&totwords,&totexp);
free(dummy);
if(struct_verbosity > 0)
printf("done.\n");
}
else {
basis=(DOC **)malloc(sizeof(DOC *)*totdoc);
for(i=0;i<totdoc;i++) {
basis[i]=(DOC *)malloc(sizeof(DOC));
(*(basis[i]))=(*(sample.examples[0].x.doc[i]));
basis[i]->fvec=copy_svector(sample.examples[0].x.doc[i]->fvec);
}
totexp=totdoc;
}
/* determine examples to use in expansion: B */
if(struct_verbosity > 0)
printf("Selecting basis functions for sparse kernel expansion..."); fflush(stdout);
if(sparm->sparse_kernel_size > totexp) sparm->sparse_kernel_size=totexp;
sm->expansion=(DOC **)malloc(sizeof(DOC *)*totexp);
sm->expansion_size=0;
for(ii=0.5;ii<totexp;ii+=((double)totexp/sparm->sparse_kernel_size)) {
sm->expansion[sm->expansion_size]=(DOC *)malloc(sizeof(DOC));
(*(sm->expansion[sm->expansion_size]))=(*(basis[(long)ii]));
sm->expansion[sm->expansion_size]->fvec=copy_svector(basis[(long)ii]->fvec);
sm->expansion_size++;
}
for(i=0;i<totexp;i++)
free_example(basis[i],1);
free(basis);
if(struct_verbosity > 0)
printf("done.\n");
/* Make sure they are all independent. If not, select independent
subset. */
if(struct_verbosity > 0)
printf("Finding independent subset of vectors..."); fflush(stdout);
G=create_matrix(sm->expansion_size,sm->expansion_size);
for(i=0;i<sm->expansion_size;i++) { /* need only upper triangle */
for(j=i;j<sm->expansion_size;j++) {
G->element[i][j]=kernel(kparm,sm->expansion[i],sm->expansion[j]);
}
}
indep=find_indep_subset_of_matrix(G,0.000001);
free_matrix(G);
new_size=0;
for(i=0;i<sm->expansion_size;i++) {
if(indep[i] != 0) {
sm->expansion[new_size]=sm->expansion[i];
new_size++;
}
else {
free_example(sm->expansion[i],1);
}
}
free_nvector(indep);
if(struct_verbosity>0)
printf("found %ld of %ld...",new_size,sm->expansion_size);
sm->expansion_size=new_size;
if(struct_verbosity > 0)
printf("done.\n");
/* compute matrix B^T*B */
if(struct_verbosity > 0)
printf("Computing Gram matrix for kernel expansion..."); fflush(stdout);
G=create_matrix(sm->expansion_size,sm->expansion_size);
for(i=0;i<sm->expansion_size;i++) { /* need upper triangle for cholesky */
for(j=i;j<sm->expansion_size;j++) {
G->element[i][j]=kernel(kparm,sm->expansion[i],sm->expansion[j]);
}
}
if(struct_verbosity > 0)
printf("done.\n");
if(struct_verbosity > 0)
printf("Computing Cholesky decomposition and inverting..."); fflush(stdout);
L=cholesky_matrix(G);
sm->invL=invert_ltriangle_matrix(L);
free_matrix(L);
free_matrix(G);
if(struct_verbosity > 0)
printf("done.\n");
/* compute new features for each example x: B^T*x */
if(struct_verbosity > 0)
printf("Replacing feature vectors..."); fflush(stdout);
orgdoc=sample.examples[0].x.doc;
sample.examples[0].x.doc=(DOC **)malloc(sizeof(DOC *)*totdoc);
words=(WORD *)malloc(sizeof(WORD)*(sm->expansion_size+1));
for(i=0;i<totdoc;i++) {
k=0;
for(j=0;j<sm->expansion_size;j++) {
weight=kernel(kparm,orgdoc[i],sm->expansion[j]);
if(weight != 0) {
words[k].wnum=j+1;
words[k].weight=weight;
k++;
}
}
words[k].wnum=0;
sample.examples[0].x.doc[i]=create_example(orgdoc[i]->docnum,
orgdoc[i]->queryid,
orgdoc[i]->slackid,
orgdoc[i]->costfactor,
create_svector(words,
orgdoc[i]->fvec->userdefined,1.0));
}
free(words);
for(i=0;i<totdoc;i++)
free_example(orgdoc[i],1);
free(orgdoc);
kparm->kernel_type=LINEAR;
if(struct_verbosity > 0)
printf("done.\n");
}
/* count number of positive and negative examples */
for(i=0;i<sample.examples[0].x.totdoc;i++) {
if(sample.examples[0].y.class[i]>0)
nump++;
else
numn++;
}
totwords=0;
for(i=0;i<totdoc;i++) /* find highest feature number */
for(w=sample.examples[0].x.doc[i]->fvec->words;w->wnum;w++)
if(totwords < w->wnum)
totwords=w->wnum;
sparm->num_features=totwords;
if(struct_verbosity>=0)
printf("Training set properties: %d features, %ld examples (%ld pos / %ld neg)\n",
sparm->num_features,totdoc,nump,numn);
sm->sizePsi=sparm->num_features;
if(struct_verbosity>=2)
printf("Size of Phi: %ld\n",sm->sizePsi);
/* fill in default value for k when using Prec@k or Rec@k */
if(sparm->loss_function == PREC_K) {
if(sparm->prec_rec_k_frac == 0) {
sparm->prec_rec_k_frac=0.5;
}
else if(sparm->prec_rec_k_frac > 1) {
printf("\nERROR: The value of option --k for Prec@k must not be larger than 1.0!\n\n");
exit(0);
}
}
if(sparm->loss_function == REC_K) {
if(sparm->prec_rec_k_frac == 0) {
sparm->prec_rec_k_frac=2;
}
else if(sparm->prec_rec_k_frac < 1) {
printf("\nERROR: The value of option --k for Rec@k must not be smaller than 1.0!\n\n");
exit(0);
}
}
/* make sure that the bias feature is not used with kernels */
if((kparm->kernel_type != LINEAR) && (sparm->bias != 0)) {
printf("\nThe value of option --b must not be zero when kernels are used.\n");
printf("The option to use a bias for non-linear kernels is not implemented yet!\n\n");
exit(0);
}
}
CONSTSET init_struct_constraints(SAMPLE sample, STRUCTMODEL *sm,
STRUCT_LEARN_PARM *sparm)
{
/* Initializes the optimization problem. Typically, you do not need
to change this function, since you want to start with an empty
set of constraints. However, if for example you have constraints
that certain weights need to be positive, you might put that in
here. The constraints are represented as lhs[i]*w >= rhs[i]. lhs
is an array of feature vectors, rhs is an array of doubles. m is
the number of constraints. The function returns the initial
set of constraints. */
CONSTSET c;
long sizePsi=sm->sizePsi;
long i;
WORD words[2];
if(1) { /* normal case: start with empty set of constraints */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -