⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crf1m_model.c

📁 CRFsuite is a very fast implmentation of the Conditional Random Fields (CRF) algorithm. It handles t
💻 C
📖 第 1 页 / 共 2 页
字号:
    }    return 0;}int crf1mmw_open_attrrefs(crf1mmw_t* writer, int num_attrs){    uint32_t offset;    FILE *fp = writer->fp;    featureref_header_t* href = NULL;    size_t size = CHUNK_SIZE + sizeof(uint32_t) * num_attrs;    /* Check if we aren't writing anything at this moment. */    if (writer->state != WSTATE_NONE) {        return CRFERR_INTERNAL_LOGIC;    }    /* Allocate a feature reference array. */    href = (featureref_header_t*)calloc(size, 1);    if (href == NULL) {        return CRFERR_OUTOFMEMORY;    }    /* Align the offset to a DWORD boundary. */    offset = (uint32_t)ftell(fp);    while (offset % 4 != 0) {        uint8_t c = 0;        fwrite(&c, sizeof(uint8_t), 1, fp);        ++offset;    }    /* Store the current offset position to the file header. */    writer->header.off_attrrefs = offset;    fseek(fp, size, SEEK_CUR);    /* Fill members in the feature reference header. */    strncpy(href->chunk, CHUNK_ATTRREF, 4);    href->size = 0;    href->num = num_attrs;    writer->href = href;    writer->state = WSTATE_ATTRREFS;    return 0;}int crf1mmw_close_attrrefs(crf1mmw_t* writer){    uint32_t i;    FILE *fp = writer->fp;    featureref_header_t* href = writer->href;    uint32_t begin = writer->header.off_attrrefs, end = 0;    /* Make sure that we are writing attribute feature references. */    if (writer->state != WSTATE_ATTRREFS) {        return CRFERR_INTERNAL_LOGIC;    }    /* Store the current offset position. */    end = (uint32_t)ftell(fp);    /* Compute the size of this chunk. */    href->size = (end - begin);    /* Write the chunk header and offset array. */    fseek(fp, begin, SEEK_SET);    write_uint8_array(fp, href->chunk, 4);    write_uint32(fp, href->size);    write_uint32(fp, href->num);    for (i = 0;i < href->num;++i) {        write_uint32(fp, href->offsets[i]);    }    /* Move the file pointer to the tail. */    fseek(fp, end, SEEK_SET);    /* Uninitialize. */    free(href);    writer->href = NULL;    writer->state = WSTATE_NONE;    return 0;}int crf1mmw_put_attrref(crf1mmw_t* writer, int aid, const feature_refs_t* ref, int *map){    int i, fid;    uint32_t n = 0, offset = 0;    FILE *fp = writer->fp;    featureref_header_t* href = writer->href;    /* Make sure that we are writing attribute feature references. */    if (writer->state != WSTATE_ATTRREFS) {        return CRFERR_INTERNAL_LOGIC;    }    /* Store the current offset to the offset array. */    href->offsets[aid] = ftell(fp);    /* Count the number of references to active features. */    for (i = 0;i < ref->num_features;++i) {        if (0 <= map[ref->fids[i]]) ++n;    }    /* Write the feature reference. */    write_uint32(fp, (uint32_t)n);    for (i = 0;i < ref->num_features;++i) {        fid = map[ref->fids[i]];        if (0 <= fid) write_uint32(fp, (uint32_t)fid);    }    return 0;}int crf1mmw_open_features(crf1mmw_t* writer){    FILE *fp = writer->fp;    feature_header_t* hfeat = NULL;    /* Check if we aren't writing anything at this moment. */    if (writer->state != WSTATE_NONE) {        return CRFERR_INTERNAL_LOGIC;    }    /* Allocate a feature chunk header. */    hfeat = (feature_header_t*)calloc(sizeof(feature_header_t), 1);    if (hfeat == NULL) {        return CRFERR_OUTOFMEMORY;    }    writer->header.off_features = (uint32_t)ftell(fp);    fseek(fp, CHUNK_SIZE, SEEK_CUR);    strncpy(hfeat->chunk, CHUNK_FEATURE, 4);    writer->hfeat = hfeat;    writer->state = WSTATE_FEATURES;    return 0;}int crf1mmw_close_features(crf1mmw_t* writer){    FILE *fp = writer->fp;    feature_header_t* hfeat = writer->hfeat;    uint32_t begin = writer->header.off_features, end = 0;    /* Make sure that we are writing attribute feature references. */    if (writer->state != WSTATE_FEATURES) {        return CRFERR_INTERNAL_LOGIC;    }    /* Store the current offset position. */    end = (uint32_t)ftell(fp);    /* Compute the size of this chunk. */    hfeat->size = (end - begin);    /* Write the chunk header and offset array. */    fseek(fp, begin, SEEK_SET);    write_uint8_array(fp, hfeat->chunk, 4);    write_uint32(fp, hfeat->size);    write_uint32(fp, hfeat->num);    /* Move the file pointer to the tail. */    fseek(fp, end, SEEK_SET);    /* Uninitialize. */    free(hfeat);    writer->hfeat = NULL;    writer->state = WSTATE_NONE;    return 0;}int crf1mmw_put_feature(crf1mmw_t* writer, int fid, const crf1mm_feature_t* f){    FILE *fp = writer->fp;    feature_header_t* hfeat = writer->hfeat;    /* Make sure that we are writing attribute feature references. */    if (writer->state != WSTATE_FEATURES) {        return CRFERR_INTERNAL_LOGIC;    }    /* We must put features #0, #1, ..., #(K-1) in this order. */    if (fid != hfeat->num) {        return CRFERR_INTERNAL_LOGIC;    }    write_uint32(fp, f->type);    write_uint32(fp, f->src);    write_uint32(fp, f->dst);    write_float(fp, f->weight);    ++hfeat->num;    return 0;}crf1mm_t* crf1mm_new(const char *filename){    FILE *fp = NULL;    uint8_t* p = NULL;    crf1mm_t *model = NULL;    header_t *header = NULL;    model = (crf1mm_t*)calloc(1, sizeof(crf1mm_t));    if (model == NULL) {        goto error_exit;    }    fp = fopen(filename, "rb");    if (fp == NULL) {        goto error_exit;    }    fseek(fp, 0, SEEK_END);    model->size = (uint32_t)ftell(fp);    fseek(fp, 0, SEEK_SET);    model->buffer = (uint8_t*)malloc(model->size + 16);    while ((uint32_t)model->buffer % 16 != 0) {        ++model->buffer;    }    fread(model->buffer, 1, model->size, fp);    fclose(fp);    /* Write the file header. */    header = (header_t*)calloc(1, sizeof(header_t));    p = model->buffer;    p += read_uint8_array(p, header->magic, sizeof(header->magic));    p += read_uint32(p, &header->size);    p += read_uint8_array(p, header->type, sizeof(header->type));    p += read_uint32(p, &header->version);    p += read_uint32(p, &header->num_features);    p += read_uint32(p, &header->num_labels);    p += read_uint32(p, &header->num_attrs);    p += read_uint32(p, &header->off_features);    p += read_uint32(p, &header->off_labels);    p += read_uint32(p, &header->off_attrs);    p += read_uint32(p, &header->off_labelrefs);    p += read_uint32(p, &header->off_attrrefs);    model->header = header;    model->labels = cqdb_reader(        model->buffer + header->off_labels,        model->size - header->off_labels        );    model->attrs = cqdb_reader(        model->buffer + header->off_attrs,        model->size - header->off_attrs        );    return model;error_exit:    if (model != NULL) {        free(model);    }    if (fp != NULL) {        fclose(fp);    }    return NULL;}void crf1mm_close(crf1mm_t* model){    if (model->labels != NULL) {        cqdb_delete(model->labels);    }    if (model->attrs != NULL) {        cqdb_delete(model->attrs);    }    free(model);}int crf1mm_get_num_attrs(crf1mm_t* model){    return model->header->num_attrs;}int crf1mm_get_num_labels(crf1mm_t* model){    return model->header->num_labels;}const char *crf1mm_to_label(crf1mm_t* model, int lid){    if (model->labels != NULL) {        return cqdb_to_string(model->labels, lid);    } else {        return NULL;    }}int crf1mm_to_lid(crf1mm_t* model, const char *value){    if (model->labels != NULL) {        return cqdb_to_id(model->labels, value);    } else {        return -1;    }}int crf1mm_to_aid(crf1mm_t* model, const char *value){    if (model->attrs != NULL) {        return cqdb_to_id(model->attrs, value);    } else {        return -1;    }}const char *crf1mm_to_attr(crf1mm_t* model, int aid){    if (model->attrs != NULL) {        return cqdb_to_string(model->attrs, aid);    } else {        return NULL;    }}int crf1mm_get_labelref(crf1mm_t* model, int lid, feature_refs_t* ref){    uint8_t *p = model->buffer;    uint32_t offset;    p += model->header->off_labelrefs;    p += CHUNK_SIZE;    p += sizeof(uint32_t) * lid;    read_uint32(p, &offset);    p = model->buffer + offset;    p += read_uint32(p, &ref->num_features);    ref->fids = (int*)p;    return 0;}int crf1mm_get_attrref(crf1mm_t* model, int aid, feature_refs_t* ref){    uint8_t *p = model->buffer;    uint32_t offset;    p += model->header->off_attrrefs;    p += CHUNK_SIZE;    p += sizeof(uint32_t) * aid;    read_uint32(p, &offset);    p = model->buffer + offset;    p += read_uint32(p, &ref->num_features);    ref->fids = (int*)p;    return 0;}int crf1mm_get_featureid(feature_refs_t* ref, int i){    uint32_t fid;    uint8_t* p = (uint8_t*)ref->fids;    p += sizeof(uint32_t) * i;    read_uint32(p, &fid);    return (int)fid;}int crf1mm_get_feature(crf1mm_t* model, int fid, crf1mm_feature_t* f){    uint8_t *p = NULL;    uint32_t val = 0;    uint32_t offset = model->header->off_features + CHUNK_SIZE;    offset += FEATURE_SIZE * fid;    p = model->buffer + offset;    p += read_uint32(p, &val);    f->type = val;    p += read_uint32(p, &val);    f->src = val;    p += read_uint32(p, &val);    f->dst = val;    p += read_float(p, &f->weight);    return 0;}void crf1mm_dump(crf1mm_t* crf1mm, FILE *fp){    int j;    uint32_t i;    feature_refs_t refs;    const header_t* hfile = crf1mm->header;    /* Dump the file header. */    fprintf(fp, "FILEHEADER = {\n");    fprintf(fp, "  magic: %c%c%c%c\n",        hfile->magic[0], hfile->magic[1], hfile->magic[2], hfile->magic[3]);    fprintf(fp, "  size: %d\n", hfile->size);    fprintf(fp, "  type: %c%c%c%c\n",        hfile->type[0], hfile->type[1], hfile->type[2], hfile->type[3]);    fprintf(fp, "  version: %d\n", hfile->version);    fprintf(fp, "  num_features: %d\n", hfile->num_features);    fprintf(fp, "  num_labels: %d\n", hfile->num_labels);    fprintf(fp, "  num_attrs: %d\n", hfile->num_attrs);    fprintf(fp, "  off_features: 0x%X\n", hfile->off_features);    fprintf(fp, "  off_labels: 0x%X\n", hfile->off_labels);    fprintf(fp, "  off_attrs: 0x%X\n", hfile->off_attrs);    fprintf(fp, "  off_labelrefs: 0x%X\n", hfile->off_labelrefs);    fprintf(fp, "  off_attrrefs: 0x%X\n", hfile->off_attrrefs);    fprintf(fp, "}\n");    fprintf(fp, "\n");    /* Dump the labels. */    fprintf(fp, "LABELS = {\n");    for (i = 0;i < hfile->num_labels;++i) {        const char *str = crf1mm_to_label(crf1mm, i);#if 0        int check = crf1mm_to_lid(crf1mm, str);        if (i != check) {            fprintf(fp, "WARNING: inconsistent label CQDB\n");        }#endif        fprintf(fp, "  %5d: %s\n", i, str);    }    fprintf(fp, "}\n");    fprintf(fp, "\n");    /* Dump the attributes. */    fprintf(fp, "ATTRIBUTES = {\n");    for (i = 0;i < hfile->num_attrs;++i) {        const char *str = crf1mm_to_attr(crf1mm, i);#if 0        int check = crf1mm_to_aid(crf1mm, str);        if (i != check) {            fprintf(fp, "WARNING: inconsistent attribute CQDB\n");        }#endif        fprintf(fp, "  %5d: %s\n", i, str);    }    fprintf(fp, "}\n");    fprintf(fp, "\n");    /* Dump the transition features. */    fprintf(fp, "TRANSITIONS = {\n");    for (i = 0;i < hfile->num_labels;++i) {        crf1mm_get_labelref(crf1mm, i, &refs);        for (j = 0;j < refs.num_features;++j) {            crf1mm_feature_t f;            int fid = crf1mm_get_featureid(&refs, j);            const char *from = NULL, *to = NULL;            crf1mm_get_feature(crf1mm, fid, &f);            from = crf1mm_to_label(crf1mm, f.src);            to = crf1mm_to_label(crf1mm, f.dst);            fprintf(fp, "  (%d) %s --> %s: %f\n", f.type, from, to, f.weight);        }    }    fprintf(fp, "}\n");    fprintf(fp, "\n");    /* Dump the transition features. */    fprintf(fp, "TRANSITIONS_FROM_BOS = {\n");    crf1mm_get_labelref(crf1mm, hfile->num_labels, &refs);    for (j = 0;j < refs.num_features;++j) {        crf1mm_feature_t f;        int fid = crf1mm_get_featureid(&refs, j);        const char *to = NULL;        crf1mm_get_feature(crf1mm, fid, &f);        to = crf1mm_to_label(crf1mm, f.dst);        fprintf(fp, "  (%d) BOS --> %s: %f\n", f.type, to, f.weight);    }    fprintf(fp, "}\n");    fprintf(fp, "\n");    /* Dump the transition features. */    fprintf(fp, "TRANSITIONS_TO_EOS = {\n");    crf1mm_get_labelref(crf1mm, hfile->num_labels+1, &refs);    for (j = 0;j < refs.num_features;++j) {        crf1mm_feature_t f;        int fid = crf1mm_get_featureid(&refs, j);        const char *from = NULL;        crf1mm_get_feature(crf1mm, fid, &f);        from = crf1mm_to_label(crf1mm, f.src);        fprintf(fp, "  (%d) %s --> EOS: %f\n", f.type, from, f.weight);    }    fprintf(fp, "}\n");    fprintf(fp, "\n");    /* Dump the transition features. */    fprintf(fp, "STATE_FEATURES = {\n");    for (i = 0;i < hfile->num_attrs;++i) {        crf1mm_get_attrref(crf1mm, i, &refs);        for (j = 0;j < refs.num_features;++j) {            crf1mm_feature_t f;            int fid = crf1mm_get_featureid(&refs, j);            const char *attr = NULL, *to = NULL;            crf1mm_get_feature(crf1mm, fid, &f);#if 0            if (f.src != i) {                fprintf(fp, "WARNING: an inconsistent attribute reference.\n");            }#endif            attr = crf1mm_to_attr(crf1mm, f.src);            to = crf1mm_to_label(crf1mm, f.dst);            fprintf(fp, "  (%d) %s --> %s: %f\n", f.type, attr, to, f.weight);        }    }    fprintf(fp, "}\n");    fprintf(fp, "\n");}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -