⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crf1m_learn.c

📁 CRFsuite is a very fast implmentation of the Conditional Random Fields (CRF) algorithm. It handles t
💻 C
📖 第 1 页 / 共 3 页
字号:
    crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal;    crf1mt->cbe_instance = instance;    crf1mt->cbe_proc = cbe;}static int crf_train_train(    crf_trainer_t* trainer,    void* instances,    int num_instances,    int num_labels,    int num_attributes    ){    int i, max_item_length;    int ret = 0;    floatval_t sigma = 10, *best_w = NULL;    crf_sequence_t* seqs = (crf_sequence_t*)instances;    crf1ml_features_t* features = NULL;    crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal;    crf_params_t *params = crf1mt->params;    crf1ml_option_t *opt = &crf1mt->opt;    /* Obtain the maximum number of items. */    max_item_length = 0;    for (i = 0;i < num_instances;++i) {        if (max_item_length < seqs[i].num_items) {            max_item_length = seqs[i].num_items;        }    }    /* Access parameters. */    crf1ml_exchange_options(crf1mt->params, opt, -1);    /* Report the parameters. */    logging(crf1mt->lg, "Training first-order linear-chain CRFs (trainer.crf1m)\n");    logging(crf1mt->lg, "\n");    /* Generate features. */    logging(crf1mt->lg, "Feature generation\n");    logging(crf1mt->lg, "feature.minfreq: %f\n", opt->feature_minfreq);    logging(crf1mt->lg, "feature.possible_states: %d\n", opt->feature_possible_states);    logging(crf1mt->lg, "feature.possible_transitions: %d\n", opt->feature_possible_transitions);    logging(crf1mt->lg, "feature.bos_eos: %d\n", opt->feature_bos_eos);    crf1mt->clk_begin = clock();    features = crf1ml_generate_features(        seqs,        num_instances,        num_labels,        num_attributes,        opt->feature_possible_states ? 1 : 0,        opt->feature_possible_transitions ? 1 : 0,        opt->feature_minfreq,        crf1mt->lg->func,        crf1mt->lg->instance        );    logging(crf1mt->lg, "Number of features: %d\n", features->num_features);    logging(crf1mt->lg, "Seconds required: %.3f\n", (clock() - crf1mt->clk_begin) / (double)CLOCKS_PER_SEC);    logging(crf1mt->lg, "\n");    /* Preparation for training. */    crf1ml_prepare(crf1mt, num_labels, num_attributes, max_item_length, features);    crf1mt->num_attributes = num_attributes;    crf1mt->num_labels = num_labels;    crf1mt->num_sequences = num_instances;    crf1mt->seqs = seqs;    crf1mt->tagger.internal = crf1mt;    crf1mt->tagger.tag = crf_train_tag;    if (strcmp(opt->algorithm, "lbfgs") == 0) {        ret = crf1ml_lbfgs(crf1mt, opt);    } else if (strcmp(opt->algorithm, "sgd") == 0) {        ret = crf1ml_sgd(crf1mt, opt);    } else {        return CRFERR_INTERNAL_LOGIC;    }    return ret;}/*#define    CRF_TRAIN_SAVE_NO_PRUNING    1*/static int crf_train_save(crf_trainer_t* trainer, const char *filename, crf_dictionary_t* attrs, crf_dictionary_t* labels){    crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal;    int a, k, l, ret;    int *fmap = NULL, *amap = NULL;    crf1mmw_t* writer = NULL;    const feature_refs_t *edge = NULL, *attr = NULL;    const floatval_t *w = crf1mt->w;    const floatval_t threshold = 0.01;    const int L = crf1mt->num_labels;    const int A = crf1mt->num_attributes;    const int K = crf1mt->num_features;    int J = 0, B = 0;    /* Start storing the model. */    logging(crf1mt->lg, "Storing the model\n");    crf1mt->clk_begin = clock();    /* Allocate and initialize the feature mapping. */    fmap = (int*)calloc(K, sizeof(int));    if (fmap == NULL) {        goto error_exit;    }#ifdef    CRF_TRAIN_SAVE_NO_PRUNING    for (k = 0;k < K;++k) fmap[k] = k;    J = K;#else    for (k = 0;k < K;++k) fmap[k] = -1;#endif/*CRF_TRAIN_SAVE_NO_PRUNING*/    /* Allocate and initialize the attribute mapping. */    amap = (int*)calloc(A, sizeof(int));    if (amap == NULL) {        goto error_exit;    }#ifdef    CRF_TRAIN_SAVE_NO_PRUNING    for (a = 0;a < A;++a) amap[a] = a;    B = A;#else    for (a = 0;a < A;++a) amap[a] = -1;#endif/*CRF_TRAIN_SAVE_NO_PRUNING*/    /*     *    Open a model writer.     */    writer = crf1mmw(filename);    if (writer == NULL) {        goto error_exit;    }    /* Open a feature chunk in the model file. */    if (ret = crf1mmw_open_features(writer)) {        goto error_exit;    }    /* Determine a set of active features and attributes. */    for (k = 0;k < crf1mt->num_features;++k) {        crf1ml_feature_t* f = &crf1mt->features[k];        if (w[k] != 0) {            int src;            crf1mm_feature_t feat;#ifndef    CRF_TRAIN_SAVE_NO_PRUNING            /* The feature (#k) will have a new feature id (#J). */            fmap[k] = J++;        /* Feature #k -> #fmap[k]. */            /* Map the source of the field. */            if (f->type == FT_STATE) {                /* The attribute #(f->src) will have a new attribute id (#B). */                if (amap[f->src] < 0) amap[f->src] = B++;    /* Attribute #a -> #amap[a]. */                src = amap[f->src];            } else {                src = f->src;            }#endif/*CRF_TRAIN_SAVE_NO_PRUNING*/            feat.type = f->type;            feat.src = src;            feat.dst = f->dst;            feat.weight = w[k];            /* Write the feature. */            if (ret = crf1mmw_put_feature(writer, fmap[k], &feat)) {                goto error_exit;            }        }    }    /* Close the feature chunk. */    if (ret = crf1mmw_close_features(writer)) {        goto error_exit;    }    logging(crf1mt->lg, "Number of active features: %d (%d)\n", J, K);    logging(crf1mt->lg, "Number of active attributes: %d (%d)\n", B, A);    logging(crf1mt->lg, "Number of active labels: %d (%d)\n", L, L);    /* Write labels. */    logging(crf1mt->lg, "Writing labels\n", L);    if (ret = crf1mmw_open_labels(writer, L)) {        goto error_exit;    }    for (l = 0;l < L;++l) {        const char *str = NULL;        labels->to_string(labels, l, &str);        if (str != NULL) {            if (ret = crf1mmw_put_label(writer, l, str)) {                goto error_exit;            }            labels->free(labels, str);        }    }    if (ret = crf1mmw_close_labels(writer)) {        goto error_exit;    }    /* Write attributes. */    logging(crf1mt->lg, "Writing attributes\n");    if (ret = crf1mmw_open_attrs(writer, B)) {        goto error_exit;    }    for (a = 0;a < A;++a) {        if (0 <= amap[a]) {            const char *str = NULL;            attrs->to_string(attrs, a, &str);            if (str != NULL) {                if (ret = crf1mmw_put_attr(writer, amap[a], str)) {                    goto error_exit;                }                attrs->free(attrs, str);            }        }    }    if (ret = crf1mmw_close_attrs(writer)) {        goto error_exit;    }    /* Write label feature references. */    logging(crf1mt->lg, "Writing feature references for transitions\n");    if (ret = crf1mmw_open_labelrefs(writer, L+2)) {        goto error_exit;    }    for (l = 0;l < L;++l) {        edge = TRANSITION_FROM(crf1mt, l);        if (ret = crf1mmw_put_labelref(writer, l, edge, fmap)) {            goto error_exit;        }    }    edge = TRANSITION_BOS(crf1mt);    if (ret = crf1mmw_put_labelref(writer, L, edge, fmap)) {        goto error_exit;    }    edge = TRANSITION_EOS(crf1mt);    if (ret = crf1mmw_put_labelref(writer, L+1, edge, fmap)) {        goto error_exit;    }    if (ret = crf1mmw_close_labelrefs(writer)) {        goto error_exit;    }    /* Write attribute feature references. */    logging(crf1mt->lg, "Writing feature references for attributes\n");    if (ret = crf1mmw_open_attrrefs(writer, B)) {        goto error_exit;    }    for (a = 0;a < A;++a) {        if (0 <= amap[a]) {            attr = ATTRIBUTE(crf1mt, a);            if (ret = crf1mmw_put_attrref(writer, amap[a], attr, fmap)) {                goto error_exit;            }        }    }    if (ret = crf1mmw_close_attrrefs(writer)) {        goto error_exit;    }    /* Close the writer. */    crf1mmw_close(writer);    logging(crf1mt->lg, "Seconds required: %.3f\n", (clock() - crf1mt->clk_begin) / (double)CLOCKS_PER_SEC);    logging(crf1mt->lg, "\n");    free(amap);    free(fmap);    return 0;error_exit:    if (writer != NULL) {        crf1mmw_close(writer);    }    if (amap != NULL) {        free(amap);    }    if (fmap != NULL) {        free(fmap);    }    return ret;}static int crf_train_addref(crf_trainer_t* trainer){    return crf_interlocked_increment(&trainer->nref);}static int crf_train_release(crf_trainer_t* trainer){    int count = crf_interlocked_decrement(&trainer->nref);    if (count == 0) {    }    return count;}static crf_params_t* crf_train_params(crf_trainer_t* trainer){    crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal;    crf_params_t* params = crf1mt->params;    params->addref(params);    return params;}int crf1ml_create_instance(const char *interface, void **ptr){    if (strcmp(interface, "trainer.crf1m") == 0) {        crf_trainer_t* trainer = (crf_trainer_t*)calloc(1, sizeof(crf_trainer_t));        trainer->nref = 1;        trainer->addref = crf_train_addref;        trainer->release = crf_train_release;        trainer->params = crf_train_params;            trainer->set_message_callback = crf_train_set_message_callback;        trainer->set_evaluate_callback = crf_train_set_evaluate_callback;        trainer->train = crf_train_train;        trainer->save = crf_train_save;        trainer->internal = crf1ml_new();        *ptr = trainer;        return 0;    } else {        return 1;    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -