📄 svm_learn.c
字号:
printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
target=(double *)my_malloc(sizeof(double)*totpair);
greater=(long *)my_malloc(sizeof(long)*totpair);
lesser=(long *)my_malloc(sizeof(long)*totpair);
k=0;
for(i=0;i<totdoc;i++) {
for(j=i+1;j<totdoc;j++) {
if(docs[i]->queryid == docs[j]->queryid) {
cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
if(rankvalue[i] > rankvalue[j]) {
if(kernel_parm->kernel_type == LINEAR)
docdiff[k]=create_example(k,0,0,cost,
sub_ss(docs[i]->fvec,docs[j]->fvec));
else {
flow=copy_svector(docs[j]->fvec);
flow->factor=-1.0;
flow->next=NULL;
fhigh=copy_svector(docs[i]->fvec);
fhigh->factor=1.0;
fhigh->next=flow;
docdiff[k]=create_example(k,0,0,cost,fhigh);
}
target[k]=1;
greater[k]=i;
lesser[k]=j;
k++;
}
else if(rankvalue[i] < rankvalue[j]) {
if(kernel_parm->kernel_type == LINEAR)
docdiff[k]=create_example(k,0,0,cost,
sub_ss(docs[i]->fvec,docs[j]->fvec));
else {
flow=copy_svector(docs[j]->fvec);
flow->factor=-1.0;
flow->next=NULL;
fhigh=copy_svector(docs[i]->fvec);
fhigh->factor=1.0;
fhigh->next=flow;
docdiff[k]=create_example(k,0,0,cost,fhigh);
}
target[k]=-1;
greater[k]=i;
lesser[k]=j;
k++;
}
}
}
}
printf("done.\n"); fflush(stdout);
/* need to get a bigger kernel cache */
if(*kernel_cache) {
kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
kernel_cache_cleanup(*kernel_cache);
(*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
}
/* must use unbiased hyperplane on difference vectors */
learn_parm->biased_hyperplane=0;
pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
kernel_parm,(*kernel_cache),pairmodel,NULL);
/* Transfer the result into a more compact model. If you would like
to output the original model on pairs of documents, see below. */
alpha=(double *)my_malloc(sizeof(double)*totdoc);
for(i=0;i<totdoc;i++) {
alpha[i]=0;
}
for(i=1;i<pairmodel->sv_num;i++) {
alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
}
model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
model->supvec[0]=0; /* element 0 reserved and empty for now */
model->alpha[0]=0;
model->sv_num=1;
for(i=0;i<totdoc;i++) {
if(alpha[i]) {
model->supvec[model->sv_num]=docs[i];
model->alpha[model->sv_num]=alpha[i];
model->index[i]=model->sv_num;
model->sv_num++;
}
else {
model->index[i]=-1;
}
}
model->at_upper_bound=0;
model->b=0;
model->lin_weights=NULL;
model->totwords=totwords;
model->totdoc=totdoc;
model->kernel_parm=(*kernel_parm);
model->loo_error=-1;
model->loo_recall=-1;
model->loo_precision=-1;
model->xa_error=-1;
model->xa_recall=-1;
model->xa_precision=-1;
free(alpha);
free(greater);
free(lesser);
free(target);
/* If you would like to output the original model on pairs of
document, replace the following lines with '(*model)=(*pairmodel);' */
for(i=0;i<totpair;i++)
free_example(docdiff[i],1);
free(docdiff);
free_model(pairmodel,0);
}
/* The following solves a freely defined and given set of
inequalities. The optimization problem is of the following form:
min 0.5 w*w + C sum_i C_i \xi_i
s.t. x_i * w > rhs_i - \xi_i
This corresponds to the -z o option. */
void svm_learn_optimization(DOC **docs, double *rhs, long int
totdoc, long int totwords,
LEARN_PARM *learn_parm,
KERNEL_PARM *kernel_parm,
KERNEL_CACHE *kernel_cache, MODEL *model,
double *alpha)
/* docs: Left-hand side of inequalities (x-part) */
/* rhs: Right-hand side of inequalities */
/* totdoc: Number of examples in docs/label */
/* totwords: Number of features (i.e. highest feature index) */
/* learn_parm: Learning paramenters */
/* kernel_parm: Kernel paramenters */
/* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.
NULL if linear.*/
/* model: Returns solution as SV expansion (assumed empty before called) */
/* alpha: Start values for the alpha variables or NULL
pointer. The new alpha values are returned after
optimization if not NULL. Array must be of size totdoc. */
{
long i,*label;
long misclassified,upsupvecnum;
double loss,model_length,example_length;
double maxdiff,*lin,*a,*c;
long runtime_start,runtime_end;
long iterations,maxslackid,svsetnum;
long *unlabeled,*inconsistent;
double r_delta_sq=0,r_delta,r_delta_avg;
long *index,*index2dnum;
double *weights,*slack,*alphaslack;
CFLOAT *aicache; /* buffer to keep one row of hessian */
TIMING timing_profile;
SHRINK_STATE shrink_state;
runtime_start=get_runtime();
timing_profile.time_kernel=0;
timing_profile.time_opti=0;
timing_profile.time_shrink=0;
timing_profile.time_update=0;
timing_profile.time_model=0;
timing_profile.time_check=0;
timing_profile.time_select=0;
kernel_cache_statistic=0;
learn_parm->totwords=totwords;
/* make sure -n value is reasonable */
if((learn_parm->svm_newvarsinqp < 2)
|| (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
}
init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
label = (long *)my_malloc(sizeof(long)*totdoc);
unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
c = (double *)my_malloc(sizeof(double)*totdoc);
a = (double *)my_malloc(sizeof(double)*totdoc);
lin = (double *)my_malloc(sizeof(double)*totdoc);
learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
model->at_upper_bound=0;
model->b=0;
model->supvec[0]=0; /* element 0 reserved and empty for now */
model->alpha[0]=0;
model->lin_weights=NULL;
model->totwords=totwords;
model->totdoc=totdoc;
model->kernel_parm=(*kernel_parm);
model->sv_num=1;
model->loo_error=-1;
model->loo_recall=-1;
model->loo_precision=-1;
model->xa_error=-1;
model->xa_recall=-1;
model->xa_precision=-1;
r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
r_delta_sq=r_delta*r_delta;
r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
if(learn_parm->svm_c == 0.0) { /* default value for C */
learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
if(verbosity>=1)
printf("Setting default regularization parameter C=%.4f\n",
learn_parm->svm_c);
}
learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
learn_parm->eps=0.0; /* No margin, unless explicitly handcoded
in the right-hand side in the training
set. */
for(i=0;i<totdoc;i++) { /* various inits */
docs[i]->docnum=i;
a[i]=0;
lin[i]=0;
c[i]=rhs[i]; /* set right-hand side */
unlabeled[i]=0;
inconsistent[i]=0;
learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
docs[i]->costfactor;
label[i]=1;
}
if(learn_parm->sharedslack) /* if shared slacks are used, they must */
for(i=0;i<totdoc;i++) /* be used on every constraint */
if(!docs[i]->slackid) {
perror("Error: Missing shared slacks definitions in some of the examples.");
exit(0);
}
/* compute starting state for initial alpha values */
if(alpha) {
if(verbosity>=1) {
printf("Computing starting state..."); fflush(stdout);
}
index = (long *)my_malloc(sizeof(long)*totdoc);
index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
weights=(double *)my_malloc(sizeof(double)*(totwords+1));
aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
index[i]=1;
alpha[i]=fabs(alpha[i]);
if(alpha[i]<0) alpha[i]=0;
if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
}
if(kernel_parm->kernel_type != LINEAR) {
for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
&& (kernel_cache_space_available(kernel_cache)))
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
if((alpha[i]==learn_parm->svm_cost[i])
&& (kernel_cache_space_available(kernel_cache)))
cache_kernel_row(kernel_cache,docs,i,kernel_parm);
}
(void)compute_index(index,totdoc,index2dnum);
update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
totwords,kernel_parm,kernel_cache,lin,aicache,
weights);
(void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
learn_parm,index2dnum,index2dnum,model);
for(i=0;i<totdoc;i++) { /* copy initial alphas */
a[i]=alpha[i];
}
free(index);
free(index2dnum);
free(weights);
free(aicache);
if(verbosity>=1) {
printf("done.\n"); fflush(stdout);
}
}
/* removing inconsistent does not work for general optimization problem */
if(learn_parm->remove_inconsistent) {
learn_parm->remove_inconsistent = 0;
printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
}
/* caching makes no sense for linear kernel */
if(kernel_parm->kernel_type == LINEAR) {
kernel_cache = NULL;
}
if(verbosity==1) {
printf("Optimizing"); fflush(stdout);
}
/* train the svm */
if(learn_parm->sharedslack)
iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
totwords,learn_parm,kernel_parm,
kernel_cache,&shrink_state,model,
a,lin,c,&timing_profile,
&maxdiff);
else
iterations=optimize_to_convergence(docs,label,totdoc,
totwords,learn_parm,kernel_parm,
kernel_cache,&shrink_state,model,
inconsistent,unlabeled,
a,lin,c,&timing_profile,
&maxdiff,(long)-1,(long)1);
if(verbosity>=1) {
if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
misclassified=0;
for(i=0;(i<totdoc);i++) { /* get final statistic */
if((lin[i]-model->b)*(double)label[i] <= 0.0)
misclassified++;
}
printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
runtime_end=get_runtime();
if(verbosity>=2) {
printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
((float)runtime_end-(float)runtime_start)/100.0,
(100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
(100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
}
else {
printf("Runtime in cpu-seconds: %.2f\n",
(runtime_end-runtime_start)/100.0);
}
}
if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
loss=0;
model_length=0;
for(i=0;i<totdoc;i++) {
if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
loss+=c[i]-(lin[i]-model->b)*(double)label[i];
model_length+=a[i]*label[i]*lin[i];
}
model_length=sqrt(model_length);
fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
}
if(learn_parm->sharedslack) {
index = (long *)my_malloc(sizeof(long)*totdoc);
index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
maxslackid=0;
for(i=0;i<totdoc;i++) { /* create full index */
index[i]=1;
if(maxslackid<docs[i]->slackid)
maxslackid=docs[i]->slackid;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -