⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crf1m_feature.c

📁 CRFsuite is a very fast implmentation of the Conditional Random Fields (CRF) algorithm. It handles t
💻 C
字号:
/* *      Feature generation for linear-chain CRF. * * Copyright (c) 2007-2009, Naoaki Okazaki * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: *     * Redistributions of source code must retain the above copyright *       notice, this list of conditions and the following disclaimer. *     * Redistributions in binary form must reproduce the above copyright *       notice, this list of conditions and the following disclaimer in the *       documentation and/or other materials provided with the distribution. *     * Neither the names of the authors nor the names of its contributors *       may be used to endorse or promote products derived from this *       software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *//* $Id: crf1m_feature.c 159 2009-03-17 01:50:30Z naoaki $ */#ifdef    HAVE_CONFIG_H#include <config.h>#endif/*HAVE_CONFIG_H*/#include <os.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <crf.h>#include "logging.h"#include "crf1m.h"#include "rumavl.h"    /* AVL tree library necessary for feature generation. *//** * Feature set. */typedef struct {    RUMAVL* avl;    /**< Root node of the AVL tree. */    int num;        /**< Number of features in the AVL tree. */} featureset_t;#define    COMP(a, b)    ((a)>(b))-((a)<(b))static int featureset_comp(const void *x, const void *y, size_t n, void *udata){    int ret = 0;    const crf1ml_feature_t* f1 = (const crf1ml_feature_t*)x;    const crf1ml_feature_t* f2 = (const crf1ml_feature_t*)y;    ret = COMP(f1->type, f2->type);    if (ret == 0) {        ret = COMP(f1->src, f2->src);        if (ret == 0) {            ret = COMP(f1->dst, f2->dst);        }    }    return ret;}static featureset_t* featureset_new(){    featureset_t* set = NULL;    set = (featureset_t*)calloc(1, sizeof(featureset_t));    if (set != NULL) {        set->num = 0;        set->avl = rumavl_new(            sizeof(crf1ml_feature_t), featureset_comp, NULL, NULL);        if (set->avl == NULL) {            free(set);            set = NULL;        }    }    return set;}static void featureset_delete(featureset_t* set){    if (set != NULL) {        rumavl_destroy(set->avl);        free(set);    }}static int featureset_add(featureset_t* set, const crf1ml_feature_t* f){    /* Check whether if the feature already exists. */    crf1ml_feature_t *p = (crf1ml_feature_t*)rumavl_find(set->avl, f);    if (p == NULL) {        /* Insert the feature to the feature set. */        rumavl_insert(set->avl, f);        ++set->num;    } else {        /* An existing feature: add the observation expectation. */        p->freq += f->freq;    }    return 0;}static void featureset_generate(crf1ml_features_t* features, featureset_t* set, floatval_t minfreq){    int n = 0, k = 0;    RUMAVL_NODE *node = NULL;    crf1ml_feature_t *f = NULL;    features->features = 0;    /* The first pass: count the number of valid features. */    while ((node = rumavl_node_next(set->avl, node, 1, (void**)&f)) != NULL) {        if (minfreq <= f->freq) {            ++n;        }    }    /* The second path: copy the valid features to the feature array. */    features->features = (crf1ml_feature_t*)calloc(n, sizeof(crf1ml_feature_t));    if (features->features != NULL) {        node = NULL;        while ((node = rumavl_node_next(set->avl, node, 1, (void**)&f)) != NULL) {            if (minfreq <= f->freq) {                memcpy(&features->features[k], f, sizeof(crf1ml_feature_t));                ++k;            }        }        features->num_features = n;    }}crf1ml_features_t* crf1ml_generate_features(    const crf_sequence_t *seqs,    int num_sequences,    int num_labels,    int num_attributes,    int connect_all_attrs,    int connect_all_edges,    floatval_t minfreq,    crf_logging_callback func,    void *instance    ){    int c, i, j, s, t;    crf1ml_feature_t f;    featureset_t* set = NULL;    crf1ml_features_t *features = NULL;    const int N = num_sequences;    const int L = num_labels;    logging_t lg;    lg.func = func;    lg.instance = instance;    lg.percent = 0;    /* Allocate a feature container. */    features = (crf1ml_features_t*)calloc(1, sizeof(crf1ml_features_t));    /* Create an instance of feature set. */    set = featureset_new();    /* Loop over the sequences in the training data. */    logging_progress_start(&lg);    for (s = 0;s < N;++s) {        int prev = L, cur = 0;        const crf_item_t* item = NULL;        const crf_sequence_t* seq = &seqs[s];        const int T = seq->num_items;        /* Loop over the items in the sequence. */        for (t = 0;t < T;++t) {            item = &seq->items[t];            cur = item->label;            /* Transition feature: label #prev -> label #(item->yid).               Features with previous label #L are transition BOS. */            f.type = (prev == L) ? FT_TRANS_BOS : FT_TRANS;            f.src = prev;            f.dst = cur;            f.freq = 1;            featureset_add(set, &f);            for (c = 0;c < item->num_contents;++c) {                /* State feature: attribute #a -> state #(item->yid). */                f.type = FT_STATE;                f.src = item->contents[c].aid;                f.dst = cur;                f.freq = item->contents[c].scale;                featureset_add(set, &f);                /* Generate state features connecting attributes with all                   output labels. These features are not unobserved in the                   training data (zero expexcations). */                if (connect_all_attrs) {                    for (i = 0;i < L;++i) {                        f.type = FT_STATE;                        f.src = item->contents[c].aid;                        f.dst = i;                        f.freq = 0;                        featureset_add(set, &f);                    }                }            }            prev = cur;        }        /* Transition EOS feature: label #(item->yid) -> EOS. */        item = &seq->items[T-1];        f.type = FT_TRANS_EOS;        f.src = cur;        f.dst = L;        f.freq = 1;        featureset_add(set, &f);        logging_progress(&lg, s * 100 / N);    }    logging_progress_end(&lg);    /* Make sure to generate all possible BOS and EOS features. */    for (i = 0;i < L;++i) {        f.type = FT_TRANS_BOS;        f.src = L;        f.dst = i;        f.freq = 0;        featureset_add(set, &f);        f.type = FT_TRANS_EOS;        f.src = i;        f.dst = L;        f.freq = 0;        featureset_add(set, &f);    }    /* Generate edge features representing all pairs of labels.       These features are not unobserved in the training data       (zero expexcations). */    if (connect_all_edges) {        for (i = 0;i < L;++i) {            for (j = 0;j < L;++j) {                f.type = FT_TRANS;                f.src = i;                f.dst = j;                f.freq = 0;                featureset_add(set, &f);            }        }    }    /* Convert the feature set to an feature array. */    featureset_generate(features, set, minfreq);    /* Delete the feature set. */    featureset_delete(set);    return features;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -