📄 lr.c
字号:
/***********************************************************************/void lr_train_update_n( lr_train *lrt){ /* X,b -> n, or M,b -> n */ /* assumes that X is binary, but M can be any matrix. */ if ( lrt->X != NULL) lr_compute_n_from_spardat( lrt->X, lrt_b0_ref(lrt), lrt_b_ref(lrt), lrt_n_ref(lrt)); else lr_compute_n_from_dym( lrt->M, lrt_b0_ref(lrt), lrt_b_ref(lrt), lrt_n_ref(lrt)); return;}void lr_train_update_u( lr_train *lrt){ /* n -> u */ lr_compute_u_from_n( lrt_n_ref(lrt), lrt_u_ref(lrt)); return;}void lr_train_update_w( lr_train *lrt){ /* u -> w */ int i; double ui, val; for (i=0; i < lrt->numrows; ++i) { ui = dyv_ref( lrt_u_ref(lrt), i); val = ui * (1-ui); dyv_set( lrt_w_ref(lrt), i, val); } return;}void lr_train_update_z( lr_train *lrt){ /* y,n,u,w -> z */ int i; double yi, ni, ui, wi, val; for (i=0; i < lrt->numrows; ++i) { yi = dyv_ref( lrt->y, i); ni = dyv_ref( lrt_n_ref(lrt), i); ui = dyv_ref( lrt_u_ref(lrt), i); wi = dyv_ref( lrt_w_ref(lrt), i); val = ni + (yi-ui) / wi; #ifndef AMFAST if (!am_isnum( val)) { my_errorf( "lr_train_update_z: NaN or Inf problem: val is %f.\n" "Inputs: i=%d, yi=%f, ni=%f, ui=%f, wi=%f\n", val, i, yi, ni, ui, wi); }#endif dyv_set( lrt_z_ref(lrt), i, val); } return;}int lr_train_update_b( lr_train *lrt){ /* X,w,z -> b */ /* [1t] [1t] Compute b = (( [--] W [1|X])^-1) * [--] W z, where W = diag(w). [Xt] [Xt] */ int numatts, i, iters; double cgeps, cgdeveps, val; dyv *B, *initb; numatts = lrt->numatts; /* We are now using initial CG residuaal for scaling cgeps. This is best done inside mk_lr_cgresult(). */ /* cgeps = lrt->numatts * lrt->opts->cgeps; */ cgeps = lrt->opts->cgeps; cgdeveps = lrt->opts->cgdeveps; /* Create initb. */ initb = NULL; if (lrt->opts->cgbinit) { initb = mk_dyv( numatts); dyv_set( initb, 0, lrt_b0_ref(lrt)); for (i=1; i<numatts; ++i) { val = dyv_ref( lrt_b_ref(lrt), i-1); dyv_set( initb, i, val); } } B = mk_lr_update_b_conjugate_gradient_helper( lrt, cgeps, cgdeveps, lrt->opts->cgmax, &iters, initb); if (initb != NULL) free_dyv( initb); /* Break newb into ( b0, b ). */ lrt_b0_set(lrt, dyv_ref( B, 0)); for (i=1; i<numatts; ++i) { val = dyv_ref( B, i); dyv_set( lrt_b_ref(lrt), i-1, val); } free_dyv( B); /* Hitting cgmax is considered a failure. */ if ( iters > lrt->opts->cgmax) return -2; return 1;}int lr_train_iterate( lr_train *lrt){ int rval; lr_train_update_w(lrt); lr_train_update_z(lrt); rval = lr_train_update_b(lrt); if (rval != 0) { /* So long as we don't have a fatal error, update n and u. */ lr_train_update_n(lrt); lr_train_update_u(lrt); } return rval;}/***********************************************************************//* METRICS *//***********************************************************************/double lr_log_likelihood_basic( dyv *y, dyv *u){ /* Compute log likelihood L(b) = Sum( yi*ln(ui) + (1-yi)*ln(1-ui). */ /* Note that this falls apart if u == 0.0 or u == 1.0, which it should never be for the logit. */ int numrows, row; double sum, val, ui, yi; numrows = dyv_size( y); /* Compute log likelihood. */ sum = 0.0; for (row=0; row<numrows; ++row) { ui = dyv_ref( u, row); yi = dyv_ref( y, row); val = yi*log(ui) + (1.0 - yi)*log(1.0 - ui); sum += val; } /* Done. */ return sum;}double lr_log_likelihood_from_deviance( double deviance, double likesat){ return likesat - (0.5 * deviance);}double lr_deviance_from_log_likelihood( double likelihood, double likesat){ return -2.0 * ( likelihood - likesat);}double lr_deviance_basic( dyv *y, dyv *u){ int i; double yi, ui, sum, deviance; /* deviance = -2*sum(yi*ln(yi/ui) + (1-yi)ln((1-yi)/(1-ui))), but with binary yi we have to compute the terms conditionally. */ sum = 0.0; for (i=0; i < dyv_size(y); ++i) { yi = dyv_ref( y, i); ui = dyv_ref( u, i); if (yi != 0) sum += log(ui); else sum += log(1-ui); /* Stop summing after overflow. */ if (sum < -FLT_MAX) return FLT_MAX; } deviance = -2*sum; return deviance;}double lr_deviance_from_spardat_b( const spardat *X, dyv *y, double b0, dyv *b){ int numrows; double dev; dyv *n, *u; numrows = spardat_num_rows( X); n = mk_dyv( numrows); u = mk_dyv( numrows); lr_compute_n_from_spardat( X, b0, b, n); lr_compute_u_from_n( n, u); dev = lr_deviance_basic( y, u); free_dyv( n); free_dyv( u); return dev;}double lr_deviance_from_dym_b( const dym *M, dyv *y, double b0, dyv *b){ int numrows; double dev; dyv *n, *u; numrows = dym_rows( M); n = mk_dyv( numrows); u = mk_dyv( numrows); lr_compute_n_from_dym( M, b0, b, n); lr_compute_u_from_n( n, u); dev = lr_deviance_basic( y, u); free_dyv( n); free_dyv( u); return dev;}double lr_train_deviance( lr_train *lrt){ double likelihood, dev; likelihood = lr_log_likelihood_basic( lrt->y, lrt_u_ref(lrt)); dev = lr_deviance_from_log_likelihood( likelihood, lrt->likesat); return dev;}/***********************************************************************//* LR_PREDICT *//***********************************************************************/lr_predict *mk_lr_predict( double b0, dyv *b){ lr_predict *lrp; lrp = AM_MALLOC( lr_predict); lrp->b0 = b0; lrp->b = mk_copy_dyv( b); return lrp;}lr_predict *mk_copy_lr_predict( lr_predict *lrp){ lr_predict *lrpcopy; lrpcopy = mk_lr_predict( lrp->b0, lrp->b); return lrpcopy;}void free_lr_predict( lr_predict *lrp){ if (lrp != NULL) { if (lrp->b != NULL) free_dyv( lrp->b); AM_FREE( lrp, lr_predict); } return;}double lr_predict_predict( ivec *posatts, dyv *attvals, lr_predict *lrp){ return lr_prediction( lrp->b0, lrp->b, posatts, attvals);}/***********************************************************************//* UTILITY *//***********************************************************************/double lr_prediction( double b0, dyv *b, ivec *posatts, dyv *attvals){ /* posatts should be a sivec. */ /* e^n / (1+e^n), n = b0 + lrb->b . posatts. */ double n, en, result; /* Compute n = <lrb->b, posatts>. */ n = b0; if (posatts != NULL) n += dyv_partial_sum( b, posatts); else n += dyv_scalar_product( b, attvals); /* Compute model value, u = e^n / (1.0 + e^n) */ en = exp(n); result = en / (1.0 + en); return result;}void lr_compute_n_from_spardat( const spardat *X, double b0, dyv *b, dyv *n){ int numrows, row; ivec *posatts; double sum; numrows = spardat_num_rows( X); for (row=0; row < numrows; ++row) { posatts = spardat_row_to_posatts( X, row); /* Remember that at one time we made a copy of posatts because of a very mysterious and hardwared/compiler-looking bug. */ sum = dyv_partial_sum( b, posatts); sum += b0; dyv_set( n, row, sum); } return;}void lr_compute_n_from_dym( const dym *M, double b0, dyv *b, dyv *n){ int numrows, numgood, row, j; double sum; numrows = dym_rows( M); numgood = dyv_size(b); for (row=0; row < numrows; ++row) { sum = 0.0; for (j=0; j<numgood; ++j) sum += dym_ref( M, row, j) * dyv_ref( b, j); sum += b0; dyv_set( n, row, sum); } return;}void lr_compute_u_from_n( dyv *n, dyv *u){ int numrows, i; double en, val, ni; numrows = dyv_size( n); for (i=0; i < numrows; ++i) { ni = dyv_ref( n, i); en = exp(ni); val = en / (1.0 + en); dyv_set( u, i, val); } return;}dyv *mk_lr_XtWXv_dyv( const lr_train *lrt, const dyv *v){ /* Compute [1t] [--] W [1|X] v [Xt] */ double v0, cterm; dyv *subv, *Xv, *XtWXv; /* Split v into v0=v[0] and subv=v[1:] */ v0 = dyv_ref( v, 0); subv = mk_dyv_slice( v, 1, dyv_size( v)); /* Compute [1|X] v. */ if (lrt->X != NULL) Xv = mk_spardat_times_dyv( lrt->X, subv); else Xv = mk_dym_times_dyv( lrt->M, subv); dyv_scalar_add( Xv, v0, Xv); free_dyv( subv); /* Compute W [1|X] v. */ dyv_mult( Xv, lrt_w_ref(lrt), Xv); /* Xv now stores WXv. */ /* Compute Xt W [1|X] v and 1t W [1|X] v separately. Both get stored in XtWXv. */ if (lrt->X != NULL) XtWXv = mk_spardat_transpose_times_dyv( lrt->X, Xv); else XtWXv = mk_dym_transpose_times_dyv( lrt->M, Xv); cterm = dyv_sum( Xv); dyv_insert( XtWXv, 0, cterm); free_dyv( Xv); return XtWXv;}dyv *mk_lr_XtWz_dyv( const lr_train *lrt){ /* XtWz = Xt v = [ r_i ], v = w * z, elementwise, r_i = dyv_partial_sum( v, posrows_i) */ double cterm; dyv *Wz, *XtWz; /* [1t] Compute [--] W z [Xt] */ /* Compute Wz. */ Wz = mk_dyv_mult( lrt_w_ref(lrt), lrt_z_ref(lrt)); /* Compute XtWz. */ if (lrt->X != NULL) XtWz = mk_spardat_transpose_times_dyv( lrt->X, Wz); else XtWz = mk_dym_transpose_times_dyv( lrt->M, Wz); /* Insert 1t Wz at beginning of XtWz. */ cterm = dyv_sum( Wz); dyv_insert( XtWz, 0, cterm); free_dyv( Wz); return XtWz;}/***********************************************************************//* CONJGRAD HELPERS *//***********************************************************************/void *lr_cg_mk_copy_userdata( const void *userdata){ return (void *) mk_copy_lr_train( (lr_train *) userdata);}void lr_cg_free_userdata( void *userdata){ free_lr_train( (lr_train *) userdata); return;}void lr_cg_multA( const dyv *v, dyv *result, void *userdata){ double lambda; lr_train *lrt; dyv *Av, *lv; lrt = (lr_train *) userdata; /* Do sparse matrix-vector multiply. */ Av = mk_lr_XtWXv_dyv( lrt, v); lambda = lrt->opts->rrlambda; if (lambda > 0.0) { /* Add Ridge Regression term. */ lv = mk_dyv_scalar_mult( v, lambda); dyv_set( lv, 0, 0.0); /* Don't penalize constant term. */ dyv_plus( Av, lv, result); free_dyv( lv); } else { /* Don't do Ridge Regression. */ copy_dyv( Av, result); } free_dyv( Av); return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -