⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crf1m_learn.c

📁 CRFsuite is a very fast implmentation of the Conditional Random Fields (CRF) algorithm. It handles t
💻 C
📖 第 1 页 / 共 3 页
字号:
    }    /*        (iv) Calculate the probabilities of the path (t, i) -> (t+1, j)            p(t+1,j|t,i)                = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm                = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T])                = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j] * C[T]        to compute expectations of transition features.     */    for (t = 0;t < T-1;++t) {        fwd = FORWARD_SCORE_AT(ctx, t);        state = STATE_SCORE_AT(ctx, t+1);        bwd = BACKWARD_SCORE_AT(ctx, t+1);        coeff = ctx->scale_factor[T];        /* Loop over the labels (t, i) */        for (i = 0;i < L;++i) {            edge = TRANS_SCORE_FROM(ctx, i);            trans = TRANSITION_FROM(trainer, i);            for (r = 0;r < trans->num_features;++r) {                fid = trans->fids[r];                f = FEATURE(trainer, fid);                j = f->dst;                func(f, fid, fwd[i] * edge[j] * state[j] * bwd[j] * coeff, 1., trainer, seq, t);            }        }    }}static int init_feature_references(crf1ml_t* trainer, const int A, const int L){    int i, k;    feature_refs_t *fl = NULL;    const int K = trainer->num_features;    const crf1ml_feature_t* features = trainer->features;    /*        The purpose of this routine is to collect references (indices) of:        - state features fired by each attribute (trainer->attributes)        - transition features pointing from each label (trainer->forward_trans)        - transition features pointing to each label (trainer->backward_trans)        - BOS features (trainer->bos_trans)        - EOS features (trainer->eos_trans).    */    /* Initialize. */    trainer->attributes = NULL;    trainer->forward_trans = NULL;    trainer->backward_trans = NULL;    /* Allocate arrays for feature references. */    trainer->attributes = (feature_refs_t*)calloc(A, sizeof(feature_refs_t));    if (trainer->attributes == NULL) goto error_exit;    trainer->forward_trans = (feature_refs_t*)calloc(L, sizeof(feature_refs_t));    if (trainer->forward_trans == NULL) goto error_exit;    trainer->backward_trans = (feature_refs_t*)calloc(L, sizeof(feature_refs_t));    if (trainer->backward_trans == NULL) goto error_exit;    memset(&trainer->bos_trans, 0, sizeof(feature_refs_t));    memset(&trainer->eos_trans, 0, sizeof(feature_refs_t));    /*        Firstly, loop over the features to count the number of references.        We don't want to use realloc() to avoid memory fragmentation.     */    for (k = 0;k < K;++k) {        const crf1ml_feature_t *f = &features[k];        switch (f->type) {        case FT_STATE:            trainer->attributes[f->src].num_features++;            break;        case FT_TRANS:            trainer->forward_trans[f->src].num_features++;            trainer->backward_trans[f->dst].num_features++;            break;        case FT_TRANS_BOS:            trainer->bos_trans.num_features++;            break;        case FT_TRANS_EOS:            trainer->eos_trans.num_features++;            break;        }    }    /*        Secondarily, allocate memory blocks to store the feature references.        We also clear fl->num_features fields, which will be used to indicate        the offset positions in the last phase.     */    for (i = 0;i < trainer->num_attributes;++i) {        fl = &trainer->attributes[i];        fl->fids = (int*)calloc(fl->num_features, sizeof(int));        if (fl->fids == NULL) goto error_exit;        fl->num_features = 0;    }    for (i = 0;i < trainer->num_labels;++i) {        fl = &trainer->forward_trans[i];        fl->fids = (int*)calloc(fl->num_features, sizeof(int));        if (fl->fids == NULL) goto error_exit;        fl->num_features = 0;        fl = &trainer->backward_trans[i];        fl->fids = (int*)calloc(fl->num_features, sizeof(int));        if (fl->fids == NULL) goto error_exit;        fl->num_features = 0;    }    fl = &trainer->bos_trans;    fl->fids = (int*)calloc(fl->num_features, sizeof(int));    if (fl->fids == NULL) goto error_exit;    fl->num_features = 0;    fl = &trainer->eos_trans;    fl->fids = (int*)calloc(fl->num_features, sizeof(int));    if (fl->fids == NULL) goto error_exit;    fl->num_features = 0;    /*        At last, store the feature indices.     */    for (k = 0;k < K;++k) {        const crf1ml_feature_t *f = &features[k];        switch (f->type) {        case FT_STATE:            fl = &trainer->attributes[f->src];            fl->fids[fl->num_features++] = k;            break;        case FT_TRANS:            fl = &trainer->forward_trans[f->src];            fl->fids[fl->num_features++] = k;            fl = &trainer->backward_trans[f->dst];            fl->fids[fl->num_features++] = k;            break;        case FT_TRANS_BOS:            fl = &trainer->bos_trans;            fl->fids[fl->num_features++] = k;            break;        case FT_TRANS_EOS:            fl = &trainer->eos_trans;            fl->fids[fl->num_features++] = k;            break;        }    }    return 0;error_exit:    if (trainer->attributes == NULL) {        for (i = 0;i < A;++i) free(trainer->attributes[i].fids);        free(trainer->attributes);        trainer->attributes = NULL;    }    if (trainer->forward_trans == NULL) {        for (i = 0;i < L;++i) free(trainer->forward_trans[i].fids);        free(trainer->forward_trans);        trainer->forward_trans = NULL;    }    if (trainer->backward_trans == NULL) {        for (i = 0;i < L;++i) free(trainer->backward_trans[i].fids);        free(trainer->backward_trans);        trainer->backward_trans = NULL;    }    return -1;}int crf1ml_prepare(    crf1ml_t* trainer,    int num_labels,    int num_attributes,    int max_item_length,    crf1ml_features_t* features    ){    int ret = 0;    const int L = num_labels;    const int A = num_attributes;    const int T = max_item_length;    /* Set basic parameters. */    trainer->num_labels = L;    trainer->num_attributes = A;    /* Construct a CRF context. */    trainer->ctx = crf1mc_new(L, T);    if (trainer->ctx == NULL) {        ret = CRFERR_OUTOFMEMORY;        goto error_exit;    }    /* Initialization for features and their weights. */    trainer->features = features->features;    trainer->num_features = features->num_features;    trainer->w = (floatval_t*)calloc(trainer->num_features, sizeof(floatval_t));    if (trainer->w == NULL) {        ret = CRFERR_OUTOFMEMORY;        goto error_exit;    }    /* Allocate the work space for probability calculation. */    trainer->prob = (floatval_t*)calloc(L, sizeof(floatval_t));    if (trainer->prob == NULL) {        ret = CRFERR_OUTOFMEMORY;        goto error_exit;    }    /* Initialize the feature references. */    init_feature_references(trainer, A, L);    return ret;error_exit:    free(trainer->attributes);    free(trainer->forward_trans);    free(trainer->backward_trans);    free(trainer->prob);    free(trainer->ctx);    return 0;}static int crf1ml_exchange_options(crf_params_t* params, crf1ml_option_t* opt, int mode){    BEGIN_PARAM_MAP(params, mode)        DDX_PARAM_STRING(            "algorithm", opt->algorithm, "lbfgs",            "The training algorithm."            )        DDX_PARAM_FLOAT(            "feature.minfreq", opt->feature_minfreq, 0.0,            "The minimum frequency of features."            )        DDX_PARAM_INT(            "feature.possible_states", opt->feature_possible_states, 0,            "Force to generate possible state features."            )        DDX_PARAM_INT(            "feature.possible_transitions", opt->feature_possible_transitions, 0,            "Force to generate possible transition features."            )        DDX_PARAM_INT("feature.bos_eos", opt->feature_bos_eos, 1,            "Generate BOS/EOS features."            )    END_PARAM_MAP()    crf1ml_lbfgs_options(params, opt, mode);    crf1ml_sgd_options(params, opt, mode);    return 0;}void crf1ml_shuffle(int *perm, int N, int init){    int i, j, tmp;    if (init) {        /* Initialize the permutation if necessary. */        for (i = 0;i < N;++i) {            perm[i] = i;        }    }    for (i = 0;i < N;++i) {        j = mt_genrand_int31() % N;        tmp = perm[j];        perm[j] = perm[i];        perm[i] = tmp;    }}crf1ml_t* crf1ml_new(){#if 0    crf1mc_test_context(stdout);    return NULL;#else    crf1ml_t* trainer = (crf1ml_t*)calloc(1, sizeof(crf1ml_t));    trainer->lg = (logging_t*)calloc(1, sizeof(logging_t));    /* Create an instance for CRF parameters. */    trainer->params = params_create_instance();    /* Set the default parameters. */    crf1ml_exchange_options(trainer->params, &trainer->opt, 0);    return trainer;#endif}void crf1ml_delete(crf1ml_t* trainer){    if (trainer != NULL) {        free(trainer->lg);    }}int crf_train_tag(crf_tagger_t* tagger, crf_sequence_t *inst, crf_output_t* output){    int i;    floatval_t logscore = 0;    crf1ml_t *crf1mt = (crf1ml_t*)tagger->internal;    const floatval_t* w = crf1mt->w;    const int K = crf1mt->num_features;    crf1m_context_t* ctx = crf1mt->ctx;    crf1mc_set_num_items(ctx, inst->num_items);    crf1ml_transition_score(crf1mt, w, K, 1.0);    crf1ml_set_labels(crf1mt, inst);    crf1ml_state_score(crf1mt, inst, w, K, 1.0);    logscore = crf1mc_viterbi(crf1mt->ctx);    crf_output_init_n(output, inst->num_items);    output->probability = logscore;    for (i = 0;i < inst->num_items;++i) {        output->labels[i] = crf1mt->ctx->labels[i];    }    output->num_labels = inst->num_items;    return 0;}void crf_train_set_message_callback(crf_trainer_t* trainer, void *instance, crf_logging_callback cbm){    crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal;    crf1mt->lg->func = cbm;    crf1mt->lg->instance = instance;}void crf_train_set_evaluate_callback(crf_trainer_t* trainer, void *instance, crf_evaluate_callback cbe){

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -