📄 lr.c
字号:
double lr_deviance_from_cg( lr_train *lrt, conjgrad *cg){ int numrows; double cgb0, likelihood, dev; dyv *cgb, *cgn, *cgu; /* Get beta. */ cgb = mk_copy_dyv( conjgrad_x_ref( cg)); /* good params */ cgb0 = dyv_ref( cgb, 0); dyv_remove( cgb, 0); numrows = lrt->numrows; cgn = mk_dyv( numrows); cgu = mk_dyv( numrows); /* Compute u and n. */ if (lrt->X != NULL) lr_compute_n_from_spardat( lrt->X, cgb0, cgb, cgn); else lr_compute_n_from_dym( lrt->M, cgb0, cgb, cgn); free_dyv( cgb); lr_compute_u_from_n( cgn, cgu); free_dyv( cgn); /* Compute likelihood and deviance. */ likelihood = lr_log_likelihood_basic( lrt->y, cgu); free_dyv( cgu); dev = lr_deviance_from_log_likelihood( likelihood, lrt->likesat); return dev;}dyv *mk_lr_cgresult_cgeps( lr_train *lrt, double unscaled_cgeps, int maxiters, conjgrad *cg){ int iters, bestiter, window; double rsqr, bestrsqr, decay, cgeps, decthresh; dyv *x, *result, *rsqrhist; dyv_array *paramhist; /* Initialize paramters. */ rsqrhist = mk_constant_dyv( maxiters, FLT_MAX); paramhist = mk_array_of_null_dyvs( maxiters); bestrsqr = FLT_MAX; window = lrt->opts->cgwindow; decay = lrt->opts->cgdecay; decthresh = FLT_MAX; /* Scale cgeps. */ rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r)); if (Verbosity >= 2) printf( " CGINITIAL RSQR: %g\n", rsqr); cgeps = unscaled_cgeps * rsqr; /* Store initial position in history. */ iters = 0; dyv_set( rsqrhist, iters, rsqr); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg)); iters += 1; /* Abort iterations if rsqr gets too small for calcs to proceed. */ while (rsqr >= cgeps) { if (Verbosity > 3) { fprintf_oneline_dyv( stdout, " CG POS:", cg->cgs->x, "\n"); } /* Non-epsilon termination conditions. */ if (iters >= maxiters) break; if (window <= 0) break; if (rsqr > decthresh) break; /* Iterate. */ cgiter( cg); /* CG resisdual Euclidean norm. */ rsqr = dyv_magnitude( conjgrad_r_ref( cg)); /* Store history. */ dyv_set( rsqrhist, iters, rsqr); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg)); if (Verbosity >= 2) printf( " CGEPS RSQR: %g\n", rsqr); /* Update records. */ if (rsqr <= bestrsqr) { bestrsqr = rsqr; window = lrt->opts->cgwindow; decthresh = decay * bestrsqr; } else window -= 1; /* Count number of iters. */ iters += 1; } /* Select parameters. */ /* CG residual: use last iteration's parameter vector. */ /* x = conjgrad_x_ref( cg); */ /* Get best params from paramhist. */ bestiter = dyv_argmin( rsqrhist); x = dyv_array_ref( paramhist, bestiter); if (x == NULL) { my_errorf( "mk_lr_cgresult_cgeps: NULL param vec %d", bestiter); } if (Verbosity >= 2) { rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r)); printf( " CGFINAL RSQR: %g\n", rsqr); } result = mk_copy_dyv( x); free_dyv_array( paramhist); free_dyv( rsqrhist); return result;}dyv *mk_lr_cgresult_cgdeveps( lr_train *lrt, double cgdeveps, int maxiters, conjgrad *cg){ int iters, bestiter, window; double dev, olddev, bestdev, rsqr, decay, decthresh; dyv *devhist, *x, *result; dyv_array *paramhist; /* Run conjugate gradient. */ devhist = mk_constant_dyv( maxiters, FLT_MAX); paramhist = mk_array_of_null_dyvs( maxiters); dev = -FLT_MAX; bestdev = FLT_MAX; window = lrt->opts->cgwindow; decay = lrt->opts->cgdecay; decthresh = FLT_MAX; /* Scale cgeps. */ rsqr = sqrt(dyv_scalar_product( cg->cgs->r, cg->cgs->r)); /* Store initial position in history. */ iters = 0; dev = lr_deviance_from_cg( lrt, cg); dyv_set( devhist, iters, dev); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg)); iters += 1; /* Abort the iters if rsqr gets too small for calcs to proceed. */ while (rsqr > 1e-300) { if (Verbosity > 3) { fprintf_oneline_dyv( stdout, " CG POS:", cg->cgs->x, "\n"); } /* Non-deviance termination criteria. */ if (iters > maxiters) break; /* Strict, since we start with iters=1. */ if (window <= 0) break; if (dev > decthresh) break; /* Iterate. */ olddev = dev; cgiter( cg); /* Relative difference of deviance. */ dev = lr_deviance_from_cg( lrt, cg); if (dev <= bestdev) { bestdev = dev; window = lrt->opts->cgwindow; decthresh = decay * bestdev; } else window -= 1; /* Store history. */ dyv_set( devhist, iters, dev); dyv_array_set( paramhist, iters, conjgrad_x_ref( cg) /* good params */); if (Verbosity >= 2) printf( "CG DEVIANCE: %g\n", dev); /* Terminate on rel diff of deviance. */ if (fabs(olddev-dev) < dev*cgdeveps) break; /* Count number of iters. */ iters += 1; /* We must calculate rsqr for the while-loop condition. */ rsqr = dyv_magnitude( conjgrad_r_ref( cg)); } /* Select parameters. */ /* Get best params from paramhist. */ bestiter = dyv_argmin( devhist); x = dyv_array_ref( paramhist, bestiter); if (x == NULL) { my_errorf( "mk_lr_cgresult_cgdeveps: NULL param vec %d", bestiter); } result = mk_copy_dyv( x); free_dyv_array( paramhist); free_dyv( devhist); return result;}/* Run conjugate gradient. */dyv *mk_lr_cgresult( lr_train *lrt, double unscaled_cgeps, double cgdeveps, int maxiters, conjgrad *cg){ dyv *result; if (cgdeveps > 0.0) { result = mk_lr_cgresult_cgdeveps( lrt, cgdeveps, maxiters, cg); } else { result = mk_lr_cgresult_cgeps( lrt, unscaled_cgeps, maxiters, cg); } if (Verbosity > 3) { fprintf_oneline_dyv( stdout, " CG POS:", result, "\n"); } return result;}void diag_precond( const dyv *v, dyv *result, void *userdata){ /* Get diagonal ( [1t] ) diag( [--] W [1|X] ) = [ m_ii = Sum(x_ki^2 * w_k over k) ] ( [Xt] ) In the sparse case, X is binary and x_ki^2 == x_ki, and the diagonal is [ m_ii = Sum(w_k over posrows_i) ]. Preconditioning matrix is the diagonal matrix. Multiply inverse of this matrix time v, which is an element-wise product. */ int colidx; double divisor, val; ivec *posrows; dyv *w; lr_train *lrt; lrt = (lr_train *) userdata; if (lrt->X == NULL) { my_error( "diag_precond: dense problems not yet supported."); } w = lrt_w_ref( lrt); val = dyv_ref( v, 0); dyv_set( result, 0, val / dyv_sum( w)); for (colidx=1; colidx < lrt->numatts; ++colidx) { posrows = spardat_attnum_to_posrows( lrt->X, colidx-1); divisor = dyv_partial_sum( w, posrows); val = dyv_ref( v, colidx); dyv_set( result, colidx, val / divisor); } return;}dyv *mk_lr_update_b_conjugate_gradient_helper( lr_train *lrt, double cgeps, double cgdeveps, int maxiters, int *iters, dyv *initx){ int numatts; dyv *B, *x; cgopts *cgo; conjgrad *cg; numatts = lrt->numatts; B = mk_lr_XtWz_dyv( lrt); cgo = mk_cgopts_qspd( numatts, maxiters, -1.0 /* eps for runcg */, B, initx, (void *) lrt, lr_cg_mk_copy_userdata, lr_cg_free_userdata, lr_cg_multA); free_dyv( B); /* Set up preconditioning. */ /* set_cgopts_multMinv( cgo, diag_precond); */ /* Remainder of setup work. */ cg = mk_conjgrad_from_cgopts( cgo); free_cgopts( cgo); /* Run conjugate gradient. */ /* Course-grained method: runcg( cg); x = conjgrad_x_ref( cg); */ /* Fine-grained method: */ x = mk_lr_cgresult( lrt, cgeps, cgdeveps, maxiters, cg); /* Print iteration information. */ *iters = cg->cgs->iterations; if (Verbosity >= 3) printf( "CG iterations=%d\n", cg->cgs->iterations); /* Done. */ free_conjgrad( cg); return x;}/***********************************************************************//* LOGISTIC START *//***********************************************************************/int lr_deviance_test( lr_train *lrt, double epsilon, double olddev, double *dev){ /* Compute deviance */ *dev = lr_train_deviance( lrt); /* Relative difference is small: |a-b|/a < epsilon */ if (fabs(*dev-olddev) < epsilon * *dev) return 1; /* If we reach this point, then we have not yet converged. */ return 0;}/* Exactly one of X and ds should be NULL. */lr_train *mk_lr_train( spardat *X, dym *factors, dyv *outputs, dyv *initb, lr_options *opts){ /* initb is copied into lr->b. */ int converge, rc; int numiters, bestiter; double dev, olddev; dyv *devhist; lr_train *lrt; lr_state *bestlrs; lr_statearr *lrsarr; /* Create lr_train struct. */ if (X != NULL) lrt = mk_lr_train_from_spardat( X, opts); else lrt = mk_lr_train_from_dym( factors, outputs, opts); /* Set initial value of model parameters, if desired. */ if (initb != NULL) lr_train_overwrite_b( lrt, initb); /* Initialize our loop state */ dev = -1000.0; lrsarr = mk_array_of_null_lr_states( opts->lrmax); devhist = mk_constant_dyv( opts->lrmax, FLT_MAX); /* START OF IRLS ITERATIONS */ /* Iterate until the change in deviance is relatively small. */ for (numiters=0; numiters < opts->lrmax; ++numiters) { /* Update olddev and iterate. */ olddev = dev; rc = lr_train_iterate(lrt); /* Test for convergence. */ lr_statearr_set( lrsarr, numiters, lrt->lrs); converge = lr_deviance_test( lrt, opts->lreps, olddev, &dev); dyv_set( devhist, numiters, dev); /* Print stuff. */ if (Verbosity >= 1) printf( "."); if (Verbosity >= 3) { printf( "LR ITER %d: likesat: %g, likelihood: %g, deviance: %g\n", numiters, lrt->likesat, lr_log_likelihood_from_deviance( dev, lrt->likesat), dev); } if (Verbosity >= 5) { /* Print all or most extreme attributes. */ printf( " Params, b0: %g\n", lrt->lrs->b0); fprintf_oneline_dyv( stdout, " Params, b:", lrt->lrs->b, "\n"); } if (converge) break; else if (rc == -2) break; /* Exceeded cgmax. */ else if (am_isnan(dev)) break; } /* END OF ITERATIONS */ /* Check state history for best holdout performance. */ bestiter = dyv_argmin( devhist); bestlrs = lr_statearr_ref( lrsarr, bestiter); free_lr_state( lrt->lrs); lrt->lrs = mk_copy_lr_state( bestlrs); if (Verbosity == 1) printf( "\n"); if (Verbosity >= 2) { printf( "CHOOSING ITERATION %d WITH DEVIANCE %g\n", bestiter, dyv_ref( devhist, bestiter)); } if (Verbosity >= 2) { fprintf_oneline_dyv( stdout, " devhist:", devhist, "\n"); } /* Free state history. */ free_lr_statearr( lrsarr); free_dyv( devhist); /* Done. */ return lrt;}/***********************************************************************//* INPUT/OUTPUT *//***********************************************************************/void out_lr_predict( PFILE *f, lr_predict *lrp){ int nump, i; double val; dyv *dv; nump = dyv_size( lrp->b) + 1; /* Copy b0, b into a single dyv. */ dv = mk_dyv( nump); dyv_set( dv, 0, lrp->b0); for (i=1; i<nump; ++i) { val = dyv_ref( lrp->b, i-1); dyv_set( dv, i, val); } dyv_write( f, dv); free_dyv( dv); return;}lr_predict *mk_in_lr_predict( PFILE *f){ int i, size; double val; dyv *dv, *b; lr_predict *lrp; lrp = AM_MALLOC( lr_predict); dv = mk_dyv_read( f); size = dyv_size( dv); lrp->b0 = dyv_ref( dv, 0); b = mk_dyv( size-1); for (i=1; i<size; ++i) { val = dyv_ref( dv, i); dyv_set( b, i-1, val); } lrp->b = b; free_dyv( dv); return lrp;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -