📄 crf1m_learn.c
字号:
crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal; crf1mt->cbe_instance = instance; crf1mt->cbe_proc = cbe;}static int crf_train_train( crf_trainer_t* trainer, void* instances, int num_instances, int num_labels, int num_attributes ){ int i, max_item_length; int ret = 0; floatval_t sigma = 10, *best_w = NULL; crf_sequence_t* seqs = (crf_sequence_t*)instances; crf1ml_features_t* features = NULL; crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal; crf_params_t *params = crf1mt->params; crf1ml_option_t *opt = &crf1mt->opt; /* Obtain the maximum number of items. */ max_item_length = 0; for (i = 0;i < num_instances;++i) { if (max_item_length < seqs[i].num_items) { max_item_length = seqs[i].num_items; } } /* Access parameters. */ crf1ml_exchange_options(crf1mt->params, opt, -1); /* Report the parameters. */ logging(crf1mt->lg, "Training first-order linear-chain CRFs (trainer.crf1m)\n"); logging(crf1mt->lg, "\n"); /* Generate features. */ logging(crf1mt->lg, "Feature generation\n"); logging(crf1mt->lg, "feature.minfreq: %f\n", opt->feature_minfreq); logging(crf1mt->lg, "feature.possible_states: %d\n", opt->feature_possible_states); logging(crf1mt->lg, "feature.possible_transitions: %d\n", opt->feature_possible_transitions); logging(crf1mt->lg, "feature.bos_eos: %d\n", opt->feature_bos_eos); crf1mt->clk_begin = clock(); features = crf1ml_generate_features( seqs, num_instances, num_labels, num_attributes, opt->feature_possible_states ? 1 : 0, opt->feature_possible_transitions ? 1 : 0, opt->feature_minfreq, crf1mt->lg->func, crf1mt->lg->instance ); logging(crf1mt->lg, "Number of features: %d\n", features->num_features); logging(crf1mt->lg, "Seconds required: %.3f\n", (clock() - crf1mt->clk_begin) / (double)CLOCKS_PER_SEC); logging(crf1mt->lg, "\n"); /* Preparation for training. */ crf1ml_prepare(crf1mt, num_labels, num_attributes, max_item_length, features); crf1mt->num_attributes = num_attributes; crf1mt->num_labels = num_labels; crf1mt->num_sequences = num_instances; crf1mt->seqs = seqs; crf1mt->tagger.internal = crf1mt; crf1mt->tagger.tag = crf_train_tag; if (strcmp(opt->algorithm, "lbfgs") == 0) { ret = crf1ml_lbfgs(crf1mt, opt); } else if (strcmp(opt->algorithm, "sgd") == 0) { ret = crf1ml_sgd(crf1mt, opt); } else { return CRFERR_INTERNAL_LOGIC; } return ret;}/*#define CRF_TRAIN_SAVE_NO_PRUNING 1*/static int crf_train_save(crf_trainer_t* trainer, const char *filename, crf_dictionary_t* attrs, crf_dictionary_t* labels){ crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal; int a, k, l, ret; int *fmap = NULL, *amap = NULL; crf1mmw_t* writer = NULL; const feature_refs_t *edge = NULL, *attr = NULL; const floatval_t *w = crf1mt->w; const floatval_t threshold = 0.01; const int L = crf1mt->num_labels; const int A = crf1mt->num_attributes; const int K = crf1mt->num_features; int J = 0, B = 0; /* Start storing the model. */ logging(crf1mt->lg, "Storing the model\n"); crf1mt->clk_begin = clock(); /* Allocate and initialize the feature mapping. */ fmap = (int*)calloc(K, sizeof(int)); if (fmap == NULL) { goto error_exit; }#ifdef CRF_TRAIN_SAVE_NO_PRUNING for (k = 0;k < K;++k) fmap[k] = k; J = K;#else for (k = 0;k < K;++k) fmap[k] = -1;#endif/*CRF_TRAIN_SAVE_NO_PRUNING*/ /* Allocate and initialize the attribute mapping. */ amap = (int*)calloc(A, sizeof(int)); if (amap == NULL) { goto error_exit; }#ifdef CRF_TRAIN_SAVE_NO_PRUNING for (a = 0;a < A;++a) amap[a] = a; B = A;#else for (a = 0;a < A;++a) amap[a] = -1;#endif/*CRF_TRAIN_SAVE_NO_PRUNING*/ /* * Open a model writer. */ writer = crf1mmw(filename); if (writer == NULL) { goto error_exit; } /* Open a feature chunk in the model file. */ if (ret = crf1mmw_open_features(writer)) { goto error_exit; } /* Determine a set of active features and attributes. */ for (k = 0;k < crf1mt->num_features;++k) { crf1ml_feature_t* f = &crf1mt->features[k]; if (w[k] != 0) { int src; crf1mm_feature_t feat;#ifndef CRF_TRAIN_SAVE_NO_PRUNING /* The feature (#k) will have a new feature id (#J). */ fmap[k] = J++; /* Feature #k -> #fmap[k]. */ /* Map the source of the field. */ if (f->type == FT_STATE) { /* The attribute #(f->src) will have a new attribute id (#B). */ if (amap[f->src] < 0) amap[f->src] = B++; /* Attribute #a -> #amap[a]. */ src = amap[f->src]; } else { src = f->src; }#endif/*CRF_TRAIN_SAVE_NO_PRUNING*/ feat.type = f->type; feat.src = src; feat.dst = f->dst; feat.weight = w[k]; /* Write the feature. */ if (ret = crf1mmw_put_feature(writer, fmap[k], &feat)) { goto error_exit; } } } /* Close the feature chunk. */ if (ret = crf1mmw_close_features(writer)) { goto error_exit; } logging(crf1mt->lg, "Number of active features: %d (%d)\n", J, K); logging(crf1mt->lg, "Number of active attributes: %d (%d)\n", B, A); logging(crf1mt->lg, "Number of active labels: %d (%d)\n", L, L); /* Write labels. */ logging(crf1mt->lg, "Writing labels\n", L); if (ret = crf1mmw_open_labels(writer, L)) { goto error_exit; } for (l = 0;l < L;++l) { const char *str = NULL; labels->to_string(labels, l, &str); if (str != NULL) { if (ret = crf1mmw_put_label(writer, l, str)) { goto error_exit; } labels->free(labels, str); } } if (ret = crf1mmw_close_labels(writer)) { goto error_exit; } /* Write attributes. */ logging(crf1mt->lg, "Writing attributes\n"); if (ret = crf1mmw_open_attrs(writer, B)) { goto error_exit; } for (a = 0;a < A;++a) { if (0 <= amap[a]) { const char *str = NULL; attrs->to_string(attrs, a, &str); if (str != NULL) { if (ret = crf1mmw_put_attr(writer, amap[a], str)) { goto error_exit; } attrs->free(attrs, str); } } } if (ret = crf1mmw_close_attrs(writer)) { goto error_exit; } /* Write label feature references. */ logging(crf1mt->lg, "Writing feature references for transitions\n"); if (ret = crf1mmw_open_labelrefs(writer, L+2)) { goto error_exit; } for (l = 0;l < L;++l) { edge = TRANSITION_FROM(crf1mt, l); if (ret = crf1mmw_put_labelref(writer, l, edge, fmap)) { goto error_exit; } } edge = TRANSITION_BOS(crf1mt); if (ret = crf1mmw_put_labelref(writer, L, edge, fmap)) { goto error_exit; } edge = TRANSITION_EOS(crf1mt); if (ret = crf1mmw_put_labelref(writer, L+1, edge, fmap)) { goto error_exit; } if (ret = crf1mmw_close_labelrefs(writer)) { goto error_exit; } /* Write attribute feature references. */ logging(crf1mt->lg, "Writing feature references for attributes\n"); if (ret = crf1mmw_open_attrrefs(writer, B)) { goto error_exit; } for (a = 0;a < A;++a) { if (0 <= amap[a]) { attr = ATTRIBUTE(crf1mt, a); if (ret = crf1mmw_put_attrref(writer, amap[a], attr, fmap)) { goto error_exit; } } } if (ret = crf1mmw_close_attrrefs(writer)) { goto error_exit; } /* Close the writer. */ crf1mmw_close(writer); logging(crf1mt->lg, "Seconds required: %.3f\n", (clock() - crf1mt->clk_begin) / (double)CLOCKS_PER_SEC); logging(crf1mt->lg, "\n"); free(amap); free(fmap); return 0;error_exit: if (writer != NULL) { crf1mmw_close(writer); } if (amap != NULL) { free(amap); } if (fmap != NULL) { free(fmap); } return ret;}static int crf_train_addref(crf_trainer_t* trainer){ return crf_interlocked_increment(&trainer->nref);}static int crf_train_release(crf_trainer_t* trainer){ int count = crf_interlocked_decrement(&trainer->nref); if (count == 0) { } return count;}static crf_params_t* crf_train_params(crf_trainer_t* trainer){ crf1ml_t *crf1mt = (crf1ml_t*)trainer->internal; crf_params_t* params = crf1mt->params; params->addref(params); return params;}int crf1ml_create_instance(const char *interface, void **ptr){ if (strcmp(interface, "trainer.crf1m") == 0) { crf_trainer_t* trainer = (crf_trainer_t*)calloc(1, sizeof(crf_trainer_t)); trainer->nref = 1; trainer->addref = crf_train_addref; trainer->release = crf_train_release; trainer->params = crf_train_params; trainer->set_message_callback = crf_train_set_message_callback; trainer->set_evaluate_callback = crf_train_set_evaluate_callback; trainer->train = crf_train_train; trainer->save = crf_train_save; trainer->internal = crf1ml_new(); *ptr = trainer; return 0; } else { return 1; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -