⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crf1m_model.c

📁 CRFsuite is a very fast implmentation of the Conditional Random Fields (CRF) algorithm. It handles t
💻 C
📖 第 1 页 / 共 2 页
字号:
/* *      Linear-chain CRF model. * * Copyright (c) 2007-2009, Naoaki Okazaki * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: *     * Redistributions of source code must retain the above copyright *       notice, this list of conditions and the following disclaimer. *     * Redistributions in binary form must reproduce the above copyright *       notice, this list of conditions and the following disclaimer in the *       documentation and/or other materials provided with the distribution. *     * Neither the names of the authors nor the names of its contributors *       may be used to endorse or promote products derived from this *       software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *//* $Id: crf1m_model.c 159 2009-03-17 01:50:30Z naoaki $ */#include "os.h"#include <stdio.h>#include <stdlib.h>#include <stdint.h>#include <string.h>#include <cqdb.h>#include "crf.h"#include "crf1m.h"#define FILEMAGIC       "lCRF"#define MODELTYPE       "FOMC"#define VERSION_NUMBER  (100)#define CHUNK_LABELREF  "LFRF"#define CHUNK_ATTRREF   "AFRF"#define CHUNK_FEATURE   "FEAT"#define HEADER_SIZE     48#define CHUNK_SIZE      12#define FEATURE_SIZE    20enum {    WSTATE_NONE,    WSTATE_LABELS,    WSTATE_ATTRS,    WSTATE_LABELREFS,    WSTATE_ATTRREFS,    WSTATE_FEATURES,};typedef struct {    uint8_t     magic[4];       /* File magic. */    uint32_t    size;           /* File size. */    uint8_t     type[4];        /* Model type */    uint32_t    version;        /* Version number. */    uint32_t    num_features;   /* Number of features. */    uint32_t    num_labels;     /* Number of labels. */    uint32_t    num_attrs;      /* Number of attributes. */    uint32_t    off_features;   /* Offset to features. */    uint32_t    off_labels;     /* Offset to label CQDB. */    uint32_t    off_attrs;      /* Offset to attribute CQDB. */    uint32_t    off_labelrefs;  /* Offset to label feature references. */    uint32_t    off_attrrefs;   /* Offset to attribute feature references. */} header_t;typedef struct {    uint8_t     chunk[4];       /* Chunk id */    uint32_t    size;           /* Chunk size. */    uint32_t    num;            /* Number of items. */    uint32_t    offsets[1];     /* Offsets. */} featureref_header_t;typedef struct {    uint8_t     chunk[4];       /* Chunk id */    uint32_t    size;           /* Chunk size. */    uint32_t    num;            /* Number of items. */} feature_header_t;struct tag_crf1mm {    uint8_t*    buffer;    uint32_t    size;    header_t*   header;    cqdb_t*     labels;    cqdb_t*     attrs;};struct tag_crf1mmw {    FILE *fp;    int state;    header_t header;    cqdb_writer_t* dbw;    featureref_header_t* href;    feature_header_t* hfeat;};enum {    KT_GLOBAL = 'A',    KT_NUMATTRS,    KT_NUMLABELS,    KT_STR2LID,    KT_LID2STR,    KT_STR2AID,    KT_FEATURE,};static int write_uint8(FILE *fp, uint8_t value){    return fwrite(&value, sizeof(value), 1, fp) == 1 ? 0 : 1;}static int read_uint8(uint8_t* buffer, uint8_t* value){    *value = *buffer;    return sizeof(*value);}static int write_uint32(FILE *fp, uint32_t value){    uint8_t buffer[4];    buffer[0] = (uint8_t)(value & 0xFF);    buffer[1] = (uint8_t)(value >> 8);    buffer[2] = (uint8_t)(value >> 16);    buffer[3] = (uint8_t)(value >> 24);    return fwrite(buffer, sizeof(uint8_t), 4, fp) == 4 ? 0 : 1;}static int read_uint32(uint8_t* buffer, uint32_t* value){    *value  = ((uint32_t)buffer[0]);    *value |= ((uint32_t)buffer[1] << 8);    *value |= ((uint32_t)buffer[2] << 16);    *value |= ((uint32_t)buffer[3] << 24);    return sizeof(*value);}static int write_uint8_array(FILE *fp, uint8_t *array, size_t n){    size_t i;    int ret = 0;    for (i = 0;i < n;++i) {        ret |= write_uint8(fp, array[i]);    }    return ret;}static int read_uint8_array(uint8_t* buffer, uint8_t *array, size_t n){    size_t i;    int ret = 0;    for (i = 0;i < n;++i) {        int size = read_uint8(buffer, &array[i]);        buffer += size;        ret += size;    }    return ret;}static void write_float(FILE *fp, floatval_t value){    /*        We assume:            - sizeof(floatval_t) = sizeof(double) = sizeof(uint64_t)            - the byte order of floatval_t and uint64_t is the same            - ARM's mixed-endian is not supported    */    uint64_t iv;    uint8_t buffer[8];    /* Copy the memory image of floatval_t value to uint64_t. */    memcpy(&iv, &value, sizeof(iv));    buffer[0] = (uint8_t)(iv & 0xFF);    buffer[1] = (uint8_t)(iv >> 8);    buffer[2] = (uint8_t)(iv >> 16);    buffer[3] = (uint8_t)(iv >> 24);    buffer[4] = (uint8_t)(iv >> 32);    buffer[5] = (uint8_t)(iv >> 40);    buffer[6] = (uint8_t)(iv >> 48);    buffer[7] = (uint8_t)(iv >> 56);    fwrite(buffer, sizeof(uint8_t), 8, fp);}static int read_float(uint8_t* buffer, floatval_t* value){    uint64_t iv;    iv  = ((uint64_t)buffer[0]);    iv |= ((uint64_t)buffer[1] << 8);    iv |= ((uint64_t)buffer[2] << 16);    iv |= ((uint64_t)buffer[3] << 24);    iv |= ((uint64_t)buffer[4] << 32);    iv |= ((uint64_t)buffer[5] << 40);    iv |= ((uint64_t)buffer[6] << 48);    iv |= ((uint64_t)buffer[7] << 56);    memcpy(value, &iv, sizeof(*value));    return sizeof(*value);}crf1mmw_t* crf1mmw(const char *filename){    header_t *header = NULL;    crf1mmw_t *writer = NULL;    /* Create a writer instance. */    writer = (crf1mmw_t*)calloc(1, sizeof(crf1mmw_t));    if (writer == NULL) {        goto error_exit;    }    /* Open the file for writing. */    writer->fp = fopen(filename, "wb");    if (writer->fp == NULL) {        goto error_exit;    }    /* Fill the members in the header. */    header = &writer->header;    strncpy(header->magic, FILEMAGIC, 4);    strncpy(header->type, MODELTYPE, 4);    header->version = VERSION_NUMBER;    /* Advance the file position to skip the file header. */    if (fseek(writer->fp, HEADER_SIZE, SEEK_CUR) != 0) {        goto error_exit;    }    return writer;error_exit:    if (writer != NULL) {        if (writer->fp != NULL) {            fclose(writer->fp);        }        free(writer);    }    return NULL;}int crf1mmw_close(crf1mmw_t* writer){    FILE *fp = writer->fp;    header_t *header = &writer->header;    /* Store the file size. */    header->size = (uint32_t)ftell(fp);    /* Move the file position to the head. */    if (fseek(fp, 0, SEEK_SET) != 0) {        goto error_exit;    }    /* Write the file header. */    write_uint8_array(fp, header->magic, sizeof(header->magic));    write_uint32(fp, header->size);    write_uint8_array(fp, header->type, sizeof(header->type));    write_uint32(fp, header->version);    write_uint32(fp, header->num_features);    write_uint32(fp, header->num_labels);    write_uint32(fp, header->num_attrs);    write_uint32(fp, header->off_features);    write_uint32(fp, header->off_labels);    write_uint32(fp, header->off_attrs);    write_uint32(fp, header->off_labelrefs);    write_uint32(fp, header->off_attrrefs);    /* Check for any error occurrence. */    if (ferror(fp)) {        goto error_exit;    }    /* Close the writer. */    fclose(fp);    free(writer);    return 0;error_exit:    if (writer != NULL) {        if (writer->fp != NULL) {            fclose(writer->fp);        }        free(writer);    }    return 1;}int crf1mmw_open_labels(crf1mmw_t* writer, int num_labels){    /* Check if we aren't writing anything at this moment. */    if (writer->state != WSTATE_NONE) {        return 1;    }    /* Store the current offset. */    writer->header.off_labels = (uint32_t)ftell(writer->fp);    /* Open a CQDB chunk for writing. */    writer->dbw = cqdb_writer(writer->fp, 0);    if (writer->dbw == NULL) {        writer->header.off_labels = 0;        return 1;    }    writer->state = WSTATE_LABELS;    writer->header.num_labels = num_labels;    return 0;}int crf1mmw_close_labels(crf1mmw_t* writer){    /* Make sure that we are writing labels. */    if (writer->state != WSTATE_LABELS) {        return 1;    }    /* Close the CQDB chunk. */    if (cqdb_writer_close(writer->dbw)) {        return 1;    }    writer->dbw = NULL;    writer->state = WSTATE_NONE;    return 0;}int crf1mmw_put_label(crf1mmw_t* writer, int lid, const char *value){    /* Make sure that we are writing labels. */    if (writer->state != WSTATE_LABELS) {        return 1;    }    /* Put the label. */    if (cqdb_writer_put(writer->dbw, value, lid)) {        return 1;    }    return 0;}int crf1mmw_open_attrs(crf1mmw_t* writer, int num_attrs){    /* Check if we aren't writing anything at this moment. */    if (writer->state != WSTATE_NONE) {        return 1;    }    /* Store the current offset. */    writer->header.off_attrs = (uint32_t)ftell(writer->fp);    /* Open a CQDB chunk for writing. */    writer->dbw = cqdb_writer(writer->fp, 0);    if (writer->dbw == NULL) {        writer->header.off_attrs = 0;        return 1;    }    writer->state = WSTATE_ATTRS;    writer->header.num_attrs = num_attrs;    return 0;}int crf1mmw_close_attrs(crf1mmw_t* writer){    /* Make sure that we are writing attributes. */    if (writer->state != WSTATE_ATTRS) {        return 1;    }    /* Close the CQDB chunk. */    if (cqdb_writer_close(writer->dbw)) {        return 1;    }    writer->dbw = NULL;    writer->state = WSTATE_NONE;    return 0;}int crf1mmw_put_attr(crf1mmw_t* writer, int aid, const char *value){    /* Make sure that we are writing labels. */    if (writer->state != WSTATE_ATTRS) {        return 1;    }    /* Put the attribute. */    if (cqdb_writer_put(writer->dbw, value, aid)) {        return 1;    }    return 0;}int crf1mmw_open_labelrefs(crf1mmw_t* writer, int num_labels){    uint32_t offset;    FILE *fp = writer->fp;    featureref_header_t* href = NULL;    size_t size = CHUNK_SIZE + sizeof(uint32_t) * num_labels;    /* Check if we aren't writing anything at this moment. */    if (writer->state != WSTATE_NONE) {        return CRFERR_INTERNAL_LOGIC;    }    /* Allocate a feature reference array. */    href = (featureref_header_t*)calloc(size, 1);    if (href == NULL) {        return CRFERR_OUTOFMEMORY;    }    /* Align the offset to a DWORD boundary. */    offset = (uint32_t)ftell(fp);    while (offset % 4 != 0) {        uint8_t c = 0;        fwrite(&c, sizeof(uint8_t), 1, fp);        ++offset;    }    /* Store the current offset position to the file header. */    writer->header.off_labelrefs = offset;    fseek(fp, size, SEEK_CUR);    /* Fill members in the feature reference header. */    strncpy(href->chunk, CHUNK_LABELREF, 4);    href->size = 0;    href->num = num_labels;    writer->href = href;    writer->state = WSTATE_LABELREFS;    return 0;}int crf1mmw_close_labelrefs(crf1mmw_t* writer){    uint32_t i;    FILE *fp = writer->fp;    featureref_header_t* href = writer->href;    uint32_t begin = writer->header.off_labelrefs, end = 0;    /* Make sure that we are writing label feature references. */    if (writer->state != WSTATE_LABELREFS) {        return CRFERR_INTERNAL_LOGIC;    }    /* Store the current offset position. */    end = (uint32_t)ftell(fp);    /* Compute the size of this chunk. */    href->size = (end - begin);    /* Write the chunk header and offset array. */    fseek(fp, begin, SEEK_SET);    write_uint8_array(fp, href->chunk, 4);    write_uint32(fp, href->size);    write_uint32(fp, href->num);    for (i = 0;i < href->num;++i) {        write_uint32(fp, href->offsets[i]);    }    /* Move the file pointer to the tail. */    fseek(fp, end, SEEK_SET);    /* Uninitialize. */    free(href);    writer->href = NULL;    writer->state = WSTATE_NONE;    return 0;}int crf1mmw_put_labelref(crf1mmw_t* writer, int lid, const feature_refs_t* ref, int *map){    int i, fid;    uint32_t n = 0, offset = 0;    FILE *fp = writer->fp;    featureref_header_t* href = writer->href;    /* Make sure that we are writing label feature references. */    if (writer->state != WSTATE_LABELREFS) {        return CRFERR_INTERNAL_LOGIC;    }    /* Store the current offset to the offset array. */    href->offsets[lid] = ftell(fp);    /* Count the number of references to active features. */    for (i = 0;i < ref->num_features;++i) {        if (0 <= map[ref->fids[i]]) ++n;    }    /* Write the feature reference. */    write_uint32(fp, (uint32_t)n);    for (i = 0;i < ref->num_features;++i) {        fid = map[ref->fids[i]];        if (0 <= fid) write_uint32(fp, (uint32_t)fid);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -