⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crf.c

📁 CRFsuite is a very fast implmentation of the Conditional Random Fields (CRF) algorithm. It handles t
💻 C
字号:
/* *      CRFsuite library. * * Copyright (c) 2007-2009, Naoaki Okazaki * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: *     * Redistributions of source code must retain the above copyright *       notice, this list of conditions and the following disclaimer. *     * Redistributions in binary form must reproduce the above copyright *       notice, this list of conditions and the following disclaimer in the *       documentation and/or other materials provided with the distribution. *     * Neither the names of the authors nor the names of its contributors *       may be used to endorse or promote products derived from this *       software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *//* $Id: crf.c 159 2009-03-17 01:50:30Z naoaki $ */#include <os.h>#include <stdarg.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <crf.h>int crf1ml_create_instance(const char *iid, void **ptr);int crf_dictionary_create_instance(const char *interface, void **ptr);int crf1m_create_instance_from_file(const char *filename, void **ptr);int crf_create_instance(const char *iid, void **ptr){    int ret =         crf1ml_create_instance(iid, ptr) == 0 ||        crf_dictionary_create_instance(iid, ptr) == 0;    return ret;}int crf_create_instance_from_file(const char *filename, void **ptr){    int ret = crf1m_create_instance_from_file(filename, ptr);    return ret;}void crf_content_init(crf_content_t* cont){    memset(cont, 0, sizeof(*cont));    cont->scale = 1;}void crf_content_set(crf_content_t* cont, int aid, floatval_t scale){    crf_content_init(cont);    cont->aid = aid;    cont->scale = scale;}void crf_content_copy(crf_content_t* dst, const crf_content_t* src){    dst->aid = src->aid;    dst->scale = src->scale;}void crf_content_swap(crf_content_t* x, crf_content_t* y){    crf_content_t tmp = *x;    x->aid = y->aid;    x->scale = y->scale;    y->aid = tmp.aid;    y->scale = tmp.scale;}void crf_item_init(crf_item_t* item){    memset(item, 0, sizeof(*item));}void crf_item_init_n(crf_item_t* item, int num_contents){    crf_item_init(item);    item->num_contents = num_contents;    item->max_contents = num_contents;    item->contents = (crf_content_t*)calloc(num_contents, sizeof(crf_content_t));}void crf_item_finish(crf_item_t* item){    free(item->contents);    crf_item_init(item);}void crf_item_copy(crf_item_t* dst, const crf_item_t* src){    int i;    dst->num_contents = src->num_contents;    dst->max_contents = src->max_contents;    dst->label = src->label;    dst->contents = (crf_content_t*)calloc(dst->num_contents, sizeof(crf_content_t));    for (i = 0;i < dst->num_contents;++i) {        crf_content_copy(&dst->contents[i], &src->contents[i]);    }}void crf_item_swap(crf_item_t* x, crf_item_t* y){    crf_item_t tmp = *x;    x->num_contents = y->num_contents;    x->max_contents = y->max_contents;    x->label = y->label;    x->contents = y->contents;    y->num_contents = tmp.num_contents;    y->max_contents = tmp.max_contents;    y->label = tmp.label;    y->contents = tmp.contents;}int crf_item_append_content(crf_item_t* item, const crf_content_t* cont){    if (item->max_contents <= item->num_contents) {        item->max_contents = (item->max_contents + 1) * 2;        item->contents = (crf_content_t*)realloc(            item->contents, sizeof(crf_content_t) * item->max_contents);    }    crf_content_copy(&item->contents[item->num_contents++], cont);    return 0;}int  crf_item_empty(crf_item_t* item){    return (item->num_contents == 0);}void crf_sequence_init(crf_sequence_t* inst){    memset(inst, 0, sizeof(*inst));}void crf_sequence_init_n(crf_sequence_t* inst, int num_items){    crf_sequence_init(inst);    inst->num_items = num_items;    inst->max_items = num_items;    inst->items = (crf_item_t*)calloc(num_items, sizeof(crf_item_t));}void crf_sequence_finish(crf_sequence_t* inst){    int i;    for (i = 0;i < inst->num_items;++i) {        crf_item_finish(&inst->items[i]);    }    free(inst->items);    crf_sequence_init(inst);}void crf_sequence_copy(crf_sequence_t* dst, const crf_sequence_t* src){    int i;    dst->num_items = src->num_items;    dst->max_items = src->max_items;    dst->items = (crf_item_t*)calloc(dst->num_items, sizeof(crf_item_t));    for (i = 0;i < dst->num_items;++i) {        crf_item_copy(&dst->items[i], &src->items[i]);    }}void crf_sequence_swap(crf_sequence_t* x, crf_sequence_t* y){    crf_sequence_t tmp = *x;    x->num_items = y->num_items;    x->max_items = y->max_items;    x->items = y->items;    y->num_items = tmp.num_items;    y->max_items = tmp.max_items;    y->items = tmp.items;}int crf_sequence_append(crf_sequence_t* inst, const crf_item_t* item, int label){    if (inst->max_items <= inst->num_items) {        inst->max_items = (inst->max_items + 1) * 2;        inst->items = (crf_item_t*)realloc(inst->items, sizeof(crf_item_t) * inst->max_items);    }    crf_item_copy(&inst->items[inst->num_items], item);    inst->items[inst->num_items].label = label;    ++inst->num_items;    return 0;}int  crf_sequence_empty(crf_sequence_t* inst){    return (inst->num_items == 0);}void crf_data_init(crf_data_t* data){    memset(data, 0, sizeof(*data));}void crf_data_init_n(crf_data_t* data, int n){    crf_data_init(data);    data->num_instances = n;    data->max_instances = n;    data->instances = (crf_sequence_t*)calloc(n, sizeof(crf_sequence_t));}void crf_data_finish(crf_data_t* data){    int i;    for (i = 0;i < data->num_instances;++i) {        crf_sequence_finish(&data->instances[i]);    }    free(data->instances);    crf_data_init(data);}void crf_data_copy(crf_data_t* dst, const crf_data_t* src){    int i;    dst->num_instances = src->num_instances;    dst->max_instances = src->max_instances;    dst->instances = (crf_sequence_t*)calloc(dst->num_instances, sizeof(crf_sequence_t));    for (i = 0;i < dst->num_instances;++i) {        crf_sequence_copy(&dst->instances[i], &src->instances[i]);    }}void crf_data_swap(crf_data_t* x, crf_data_t* y){    crf_data_t tmp = *x;    x->num_instances = y->num_instances;    x->max_instances = y->max_instances;    x->instances = y->instances;    y->num_instances = tmp.num_instances;    y->max_instances = tmp.max_instances;    y->instances = tmp.instances;}int  crf_data_append(crf_data_t* data, const crf_sequence_t* inst){    if (0 < inst->num_items) {        if (data->max_instances <= data->num_instances) {            data->max_instances = (data->max_instances + 1) * 2;            data->instances = (crf_sequence_t*)realloc(                data->instances, sizeof(crf_sequence_t) * data->max_instances);        }        crf_sequence_copy(&data->instances[data->num_instances++], inst);    }    return 0;}int crf_data_maxlength(crf_data_t* data){    int i, T = 0;    for (i = 0;i < data->num_instances;++i) {        if (T < data->instances[i].num_items) {            T = data->instances[i].num_items;        }    }    return T;}int  crf_data_totalitems(crf_data_t* data){    int i, n = 0;    for (i = 0;i < data->num_instances;++i) {        n += data->instances[i].num_items;    }    return n;}void crf_output_init(crf_output_t* output){    memset(output, 0, sizeof(*output));}void crf_output_init_n(crf_output_t* output, int n){    crf_output_init(output);    output->labels = (int*)calloc(n, sizeof(int));    if (output->labels != NULL) {        output->num_labels = n;    }}void crf_output_finish(crf_output_t* output){    free(output->labels);    crf_output_init(output);    }static char *safe_strncpy(char *dst, const char *src, size_t n){    strncpy(dst, src, n-1);    dst[n-1] = 0;    return dst;}void crf_evaluation_init(crf_evaluation_t* eval, int n){    memset(eval, 0, sizeof(*eval));    eval->tbl = (crf_label_evaluation_t*)calloc(n+1, sizeof(crf_label_evaluation_t));    if (eval->tbl != NULL) {        eval->num_labels = n;    }}void crf_evaluation_clear(crf_evaluation_t* eval){    int i;    for (i = 0;i <= eval->num_labels;++i) {        memset(&eval->tbl[i], 0, sizeof(eval->tbl[i]));    }    eval->item_total_correct = 0;    eval->item_total_num = 0;    eval->item_total_model = 0;    eval->item_total_observation = 0;    eval->item_accuracy = 0;    eval->inst_total_correct = 0;    eval->inst_total_num = 0;    eval->inst_accuracy = 0;    eval->macro_precision = 0;    eval->macro_recall = 0;    eval->macro_fmeasure = 0;}void crf_evaluation_finish(crf_evaluation_t* eval){    free(eval->tbl);    memset(eval, 0, sizeof(*eval));}int crf_evaluation_accmulate(crf_evaluation_t* eval, const crf_sequence_t* reference, const crf_output_t* target){    int t, nc = 0;    /* Make sure that the reference and target sequences have the output labels of the same length. */    if (reference->num_items != target->num_labels) {        return 1;    }    for (t = 0;t < target->num_labels;++t) {        int lr = reference->items[t].label;        int lt = target->labels[t];        if (eval->num_labels <= lr || eval->num_labels <= lt) {            return 1;        }        ++eval->tbl[lr].num_observation;        ++eval->tbl[lt].num_model;        if (lr == lt) {            ++eval->tbl[lr].num_correct;            ++nc;        }        ++eval->item_total_num;    }    if (nc == target->num_labels) {        ++eval->inst_total_correct;    }    ++eval->inst_total_num;    return 0;}void crf_evaluation_compute(crf_evaluation_t* eval){    int i;    for (i = 0;i <= eval->num_labels;++i) {        crf_label_evaluation_t* lev = &eval->tbl[i];        /* Do not evaluate labels that does not in the test data. */        if (lev->num_observation == 0) {            continue;        }        /* Sum the number of correct labels for accuracy calculation. */        eval->item_total_correct += lev->num_correct;        eval->item_total_model += lev->num_model;        eval->item_total_observation += lev->num_observation;        /* Initialize the precision, recall, and f1-measure values. */        lev->precision = 0;        lev->recall = 0;        lev->fmeasure = 0;        /* Compute the precision, recall, and f1-measure values. */        if (lev->num_model > 0) {            lev->precision = lev->num_correct / (double)lev->num_model;        }        if (lev->num_observation > 0) {            lev->recall = lev->num_correct / (double)lev->num_observation;        }        if (lev->precision + lev->recall > 0) {            lev->fmeasure = lev->precision * lev->recall * 2 / (lev->precision + lev->recall);        }        /* Exclude unknown labels from calculation of macro-average values. */        if (i != eval->num_labels) {            eval->macro_precision += lev->precision;            eval->macro_recall += lev->recall;            eval->macro_fmeasure += lev->fmeasure;        }    }    /* Copute the macro precision, recall, and f1-measure values. */    eval->macro_precision /= eval->num_labels;    eval->macro_recall /= eval->num_labels;    eval->macro_fmeasure /= eval->num_labels;    /* Compute the item accuracy. */    eval->item_accuracy = 0;    if (0 < eval->item_total_num) {        eval->item_accuracy = eval->item_total_correct / (double)eval->item_total_num;    }    /* Compute the instance accuracy. */    eval->inst_accuracy = 0;    if (0 < eval->inst_total_num) {        eval->inst_accuracy = eval->inst_total_correct / (double)eval->inst_total_num;    }}void crf_evaluation_output(crf_evaluation_t* eval, crf_dictionary_t* labels, FILE *fpo){    int i;    const char *lstr = NULL;    fprintf(fpo, "Performance by label (#match, #model, #ref) (precision, recall, F1):\n");    for (i = 0;i < eval->num_labels;++i) {        const crf_label_evaluation_t* lev = &eval->tbl[i];        labels->to_string(labels, i, &lstr);        if (lstr == NULL) lstr = "[UNKNOWN]";        if (lev->num_observation == 0) {            fprintf(fpo, "    %s: (%d, %d, %d) (******, ******, ******)\n",                lstr, lev->num_correct, lev->num_model, lev->num_observation                );        } else {            fprintf(fpo, "    %s: (%d, %d, %d) (%1.4f, %1.4f, %1.4f)\n",                lstr, lev->num_correct, lev->num_model, lev->num_observation,                lev->precision, lev->recall, lev->fmeasure                );        }        labels->free(labels, lstr);    }    fprintf(fpo, "Macro-average precision, recall, F1: (%f, %f, %f)\n",        eval->macro_precision, eval->macro_recall, eval->macro_fmeasure        );    fprintf(fpo, "Item accuracy: %d / %d (%1.4f)\n",        eval->item_total_correct, eval->item_total_num, eval->item_accuracy        );    fprintf(fpo, "Instance accuracy: %d / %d (%1.4f)\n",        eval->inst_total_correct, eval->inst_total_num, eval->inst_accuracy        );}int crf_interlocked_increment(int *count){    return ++(*count);}int crf_interlocked_decrement(int *count){    return --(*count);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -