📄 lbfgs.c

📁 This library is a C port of the implementation of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* *      Limited memory BFGS (L-BFGS). * * Copyright (c) 1990, Jorge Nocedal * Copyright (c) 2007,2008,2009 Naoaki Okazaki * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. *//* $Id: lbfgs.c 55 2009-02-23 14:51:21Z naoaki $ *//*This library is a C port of the FORTRAN implementation of Limited-memoryBroyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.The original FORTRAN source code is available at:http://www.ece.northwestern.edu/~nocedal/lbfgs.htmlThe L-BFGS algorithm is described in:    - Jorge Nocedal.      Updating Quasi-Newton Matrices with Limited Storage.      <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980.    - Dong C. Liu and Jorge Nocedal.      On the limited memory BFGS method for large scale optimization.      <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989.The line search algorithms used in this implementation are described in:    - John E. Dennis and Robert B. Schnabel.      <i>Numerical Methods for Unconstrained Optimization and Nonlinear      Equations</i>, Englewood Cliffs, 1983.    - Jorge J. More and David J. Thuente.      Line search algorithm with guaranteed sufficient decrease.      <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3,      pp. 286-307, 1994.This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN)method presented in:    - Galen Andrew and Jianfeng Gao.      Scalable training of L1-regularized log-linear models.      In <i>Proceedings of the 24th International Conference on Machine      Learning (ICML 2007)</i>, pp. 33-40, 2007.I would like to thank the original author, Jorge Nocedal, who has beendistributing the effieicnt and explanatory implementation in an open sourcelicence.*/#ifdef  HAVE_CONFIG_H#include <config.h>#endif/*HAVE_CONFIG_H*/#include <stdio.h>#include <stdlib.h>#include <math.h>#include <lbfgs.h>#ifdef  _MSC_VER#define inline  __inlinetypedef unsigned int uint32_t;#endif/*_MSC_VER*/#if     defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64/* Use SSE2 optimization for 64bit double precision. */#include "arithmetic_sse_double.h"#elif   defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32/* Use SSE optimization for 32bit float precision. */#include "arithmetic_sse_float.h"#else/* No CPU specific optimization. */#include "arithmetic_ansi.h"#endif#define min2(a, b)      ((a) <= (b) ? (a) : (b))#define max2(a, b)      ((a) >= (b) ? (a) : (b))#define max3(a, b, c)   max2(max2((a), (b)), (c));struct tag_callback_data {    int n;    void *instance;    lbfgs_evaluate_t proc_evaluate;    lbfgs_progress_t proc_progress;};typedef struct tag_callback_data callback_data_t;struct tag_iteration_data {    lbfgsfloatval_t alpha;    lbfgsfloatval_t *s;     /* [n] */    lbfgsfloatval_t *y;     /* [n] */    lbfgsfloatval_t ys;     /* vecdot(y, s) */};typedef struct tag_iteration_data iteration_data_t;static const lbfgs_parameter_t _defparam = {    6, 1e-5, 0, 1e-5,    0, LBFGS_LINESEARCH_DEFAULT, 40,    1e-20, 1e20, 1e-4, 0.9, 1.0e-16,    0.0, 0, -1,};/* Forward function declarations. */typedef int (*line_search_proc)(    int n,    lbfgsfloatval_t *x,    lbfgsfloatval_t *f,    lbfgsfloatval_t *g,    lbfgsfloatval_t *s,    lbfgsfloatval_t *stp,    const lbfgsfloatval_t* xp,    const lbfgsfloatval_t* gp,    lbfgsfloatval_t *wa,    callback_data_t *cd,    const lbfgs_parameter_t *param    );    static int line_search_backtracking(    int n,    lbfgsfloatval_t *x,    lbfgsfloatval_t *f,    lbfgsfloatval_t *g,    lbfgsfloatval_t *s,    lbfgsfloatval_t *stp,    const lbfgsfloatval_t* xp,    const lbfgsfloatval_t* gp,    lbfgsfloatval_t *wa,    callback_data_t *cd,    const lbfgs_parameter_t *param    );static int line_search_backtracking_owlqn(    int n,    lbfgsfloatval_t *x,    lbfgsfloatval_t *f,    lbfgsfloatval_t *g,    lbfgsfloatval_t *s,    lbfgsfloatval_t *stp,    const lbfgsfloatval_t* xp,    const lbfgsfloatval_t* gp,    lbfgsfloatval_t *wp,    callback_data_t *cd,    const lbfgs_parameter_t *param    );static int line_search_morethuente(    int n,    lbfgsfloatval_t *x,    lbfgsfloatval_t *f,    lbfgsfloatval_t *g,    lbfgsfloatval_t *s,    lbfgsfloatval_t *stp,    const lbfgsfloatval_t* xp,    const lbfgsfloatval_t* gp,    lbfgsfloatval_t *wa,    callback_data_t *cd,    const lbfgs_parameter_t *param    );static int update_trial_interval(    lbfgsfloatval_t *x,    lbfgsfloatval_t *fx,    lbfgsfloatval_t *dx,    lbfgsfloatval_t *y,    lbfgsfloatval_t *fy,    lbfgsfloatval_t *dy,    lbfgsfloatval_t *t,    lbfgsfloatval_t *ft,    lbfgsfloatval_t *dt,    const lbfgsfloatval_t tmin,    const lbfgsfloatval_t tmax,    int *brackt    );static lbfgsfloatval_t owlqn_x1norm(    const lbfgsfloatval_t* x,    const int start,    const int n    );static void owlqn_pseudo_gradient(    lbfgsfloatval_t* pg,    const lbfgsfloatval_t* x,    const lbfgsfloatval_t* g,    const int n,    const lbfgsfloatval_t c,    const int start,    const int end    );static void owlqn_project(    lbfgsfloatval_t* d,    const lbfgsfloatval_t* sign,    const int start,    const int end    );#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))static int round_out_variables(int n){    n += 7;    n /= 8;    n *= 8;    return n;}#endif/*defined(USE_SSE)*/lbfgsfloatval_t* lbfgs_malloc(int n){#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))    n = round_out_variables(n);#endif/*defined(USE_SSE)*/    return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n);}void lbfgs_free(lbfgsfloatval_t *x){    vecfree(x);}void lbfgs_parameter_init(lbfgs_parameter_t *param){    memcpy(param, &_defparam, sizeof(*param));}int lbfgs(    int n,    lbfgsfloatval_t *x,    lbfgsfloatval_t *ptr_fx,    lbfgs_evaluate_t proc_evaluate,    lbfgs_progress_t proc_progress,    void *instance,    lbfgs_parameter_t *_param    ){    int ret;    int i, j, k, ls, end, bound;    lbfgsfloatval_t step;    /* Constant parameters and their default values. */    lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;    const int m = param.m;    lbfgsfloatval_t *xp = NULL;    lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL;    lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL;    iteration_data_t *lm = NULL, *it = NULL;    lbfgsfloatval_t ys, yy;    lbfgsfloatval_t xnorm, gnorm, beta;    lbfgsfloatval_t fx = 0.;    lbfgsfloatval_t rate = 0.;    line_search_proc linesearch = line_search_morethuente;    /* Construct a callback data. */    callback_data_t cd;    cd.n = n;    cd.instance = instance;    cd.proc_evaluate = proc_evaluate;    cd.proc_progress = proc_progress;#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))    /* Round out the number of variables. */    n = round_out_variables(n);#endif/*defined(USE_SSE)*/    /* Check the input parameters for errors. */    if (n <= 0) {        return LBFGSERR_INVALID_N;    }#if     defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))    if (n % 8 != 0) {        return LBFGSERR_INVALID_N_SSE;    }    if (((unsigned short)x & 0x000F) != 0) {        return LBFGSERR_INVALID_X_SSE;    }#endif/*defined(USE_SSE)*/    if (param.epsilon < 0.) {        return LBFGSERR_INVALID_EPSILON;    }    if (param.past < 0) {        return LBFGSERR_INVALID_TESTPERIOD;    }    if (param.delta < 0.) {        return LBFGSERR_INVALID_DELTA;    }    if (param.min_step < 0.) {        return LBFGSERR_INVALID_MINSTEP;    }    if (param.max_step < param.min_step) {        return LBFGSERR_INVALID_MAXSTEP;    }    if (param.ftol < 0.) {        return LBFGSERR_INVALID_FTOL;    }    if (param.gtol < 0.) {        return LBFGSERR_INVALID_GTOL;    }    if (param.xtol < 0.) {        return LBFGSERR_INVALID_XTOL;    }    if (param.max_linesearch <= 0) {        return LBFGSERR_INVALID_MAXLINESEARCH;    }    if (param.orthantwise_c < 0.) {        return LBFGSERR_INVALID_ORTHANTWISE;    }    if (param.orthantwise_start < 0 || n < param.orthantwise_start) {        return LBFGSERR_INVALID_ORTHANTWISE_START;    }    if (param.orthantwise_end < 0) {        param.orthantwise_end = n;    }    if (n < param.orthantwise_end) {        return LBFGSERR_INVALID_ORTHANTWISE_END;    }    if (param.orthantwise_c != 0.) {        switch (param.linesearch) {        case LBFGS_LINESEARCH_BACKTRACKING:            linesearch = line_search_backtracking_owlqn;            break;        default:            /* Only the backtracking method is available. */            return LBFGSERR_INVALID_LINESEARCH;        }    } else {        switch (param.linesearch) {        case LBFGS_LINESEARCH_MORETHUENTE:            linesearch = line_search_morethuente;            break;        case LBFGS_LINESEARCH_BACKTRACKING:        case LBFGS_LINESEARCH_BACKTRACKING_STRONG:            linesearch = line_search_backtracking;            break;        default:            return LBFGSERR_INVALID_LINESEARCH;        }    }    /* Allocate working space. */    xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));    g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));    gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));    d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));    w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));    if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) {        ret = LBFGSERR_OUTOFMEMORY;        goto lbfgs_exit;    }    if (param.orthantwise_c != 0.) {        /* Allocate working space for OW-LQN. */        pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));        if (pg == NULL) {            ret = LBFGSERR_OUTOFMEMORY;            goto lbfgs_exit;        }    }    /* Allocate limited memory storage. */    lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t));    if (lm == NULL) {        ret = LBFGSERR_OUTOFMEMORY;        goto lbfgs_exit;    }    /* Initialize the limited memory. */    for (i = 0;i < m;++i) {        it = &lm[i];        it->alpha = 0;        it->ys = 0;        it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));        it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));        if (it->s == NULL || it->y == NULL) {            ret = LBFGSERR_OUTOFMEMORY;            goto lbfgs_exit;        }    }    /* Allocate an array for storing previous values of the objective function. */    if (0 < param.past) {        pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t));    }    /* Evaluate the function value and its gradient. */    fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);    if (0. != param.orthantwise_c) {        /* Compute the L1 norm of the variable and add it to the object value. */        xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);        fx += xnorm * param.orthantwise_c;        owlqn_pseudo_gradient(            pg, x, g, n,            param.orthantwise_c, param.orthantwise_start, param.orthantwise_end            );    }    /* Store the initial value of the objective function. */    if (pf != NULL) {        pf[0] = fx;    }    /*        Compute the direction;        we assume the initial hessian matrix H_0 as the identity matrix.     */    if (param.orthantwise_c == 0.) {        vecncpy(d, g, n);    } else {        vecncpy(d, pg, n);    }    /*       Make sure that the initial variables are not a minimizer.     */    vec2norm(&xnorm, x, n);    if (param.orthantwise_c == 0.) {        vec2norm(&gnorm, g, n);    } else {        vec2norm(&gnorm, pg, n);    }    if (xnorm < 1.0) xnorm = 1.0;    if (gnorm / xnorm <= param.epsilon) {        ret = LBFGS_ALREADY_MINIMIZED;        goto lbfgs_exit;    }    /* Compute the initial step:        step = 1.0 / sqrt(vecdot(d, d, n))     */    vec2norminv(&step, d, n);    k = 1;    end = 0;    for (;;) {
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -