📄 crf1m_model.c
字号:
} return 0;}int crf1mmw_open_attrrefs(crf1mmw_t* writer, int num_attrs){ uint32_t offset; FILE *fp = writer->fp; featureref_header_t* href = NULL; size_t size = CHUNK_SIZE + sizeof(uint32_t) * num_attrs; /* Check if we aren't writing anything at this moment. */ if (writer->state != WSTATE_NONE) { return CRFERR_INTERNAL_LOGIC; } /* Allocate a feature reference array. */ href = (featureref_header_t*)calloc(size, 1); if (href == NULL) { return CRFERR_OUTOFMEMORY; } /* Align the offset to a DWORD boundary. */ offset = (uint32_t)ftell(fp); while (offset % 4 != 0) { uint8_t c = 0; fwrite(&c, sizeof(uint8_t), 1, fp); ++offset; } /* Store the current offset position to the file header. */ writer->header.off_attrrefs = offset; fseek(fp, size, SEEK_CUR); /* Fill members in the feature reference header. */ strncpy(href->chunk, CHUNK_ATTRREF, 4); href->size = 0; href->num = num_attrs; writer->href = href; writer->state = WSTATE_ATTRREFS; return 0;}int crf1mmw_close_attrrefs(crf1mmw_t* writer){ uint32_t i; FILE *fp = writer->fp; featureref_header_t* href = writer->href; uint32_t begin = writer->header.off_attrrefs, end = 0; /* Make sure that we are writing attribute feature references. */ if (writer->state != WSTATE_ATTRREFS) { return CRFERR_INTERNAL_LOGIC; } /* Store the current offset position. */ end = (uint32_t)ftell(fp); /* Compute the size of this chunk. */ href->size = (end - begin); /* Write the chunk header and offset array. */ fseek(fp, begin, SEEK_SET); write_uint8_array(fp, href->chunk, 4); write_uint32(fp, href->size); write_uint32(fp, href->num); for (i = 0;i < href->num;++i) { write_uint32(fp, href->offsets[i]); } /* Move the file pointer to the tail. */ fseek(fp, end, SEEK_SET); /* Uninitialize. */ free(href); writer->href = NULL; writer->state = WSTATE_NONE; return 0;}int crf1mmw_put_attrref(crf1mmw_t* writer, int aid, const feature_refs_t* ref, int *map){ int i, fid; uint32_t n = 0, offset = 0; FILE *fp = writer->fp; featureref_header_t* href = writer->href; /* Make sure that we are writing attribute feature references. */ if (writer->state != WSTATE_ATTRREFS) { return CRFERR_INTERNAL_LOGIC; } /* Store the current offset to the offset array. */ href->offsets[aid] = ftell(fp); /* Count the number of references to active features. */ for (i = 0;i < ref->num_features;++i) { if (0 <= map[ref->fids[i]]) ++n; } /* Write the feature reference. */ write_uint32(fp, (uint32_t)n); for (i = 0;i < ref->num_features;++i) { fid = map[ref->fids[i]]; if (0 <= fid) write_uint32(fp, (uint32_t)fid); } return 0;}int crf1mmw_open_features(crf1mmw_t* writer){ FILE *fp = writer->fp; feature_header_t* hfeat = NULL; /* Check if we aren't writing anything at this moment. */ if (writer->state != WSTATE_NONE) { return CRFERR_INTERNAL_LOGIC; } /* Allocate a feature chunk header. */ hfeat = (feature_header_t*)calloc(sizeof(feature_header_t), 1); if (hfeat == NULL) { return CRFERR_OUTOFMEMORY; } writer->header.off_features = (uint32_t)ftell(fp); fseek(fp, CHUNK_SIZE, SEEK_CUR); strncpy(hfeat->chunk, CHUNK_FEATURE, 4); writer->hfeat = hfeat; writer->state = WSTATE_FEATURES; return 0;}int crf1mmw_close_features(crf1mmw_t* writer){ FILE *fp = writer->fp; feature_header_t* hfeat = writer->hfeat; uint32_t begin = writer->header.off_features, end = 0; /* Make sure that we are writing attribute feature references. */ if (writer->state != WSTATE_FEATURES) { return CRFERR_INTERNAL_LOGIC; } /* Store the current offset position. */ end = (uint32_t)ftell(fp); /* Compute the size of this chunk. */ hfeat->size = (end - begin); /* Write the chunk header and offset array. */ fseek(fp, begin, SEEK_SET); write_uint8_array(fp, hfeat->chunk, 4); write_uint32(fp, hfeat->size); write_uint32(fp, hfeat->num); /* Move the file pointer to the tail. */ fseek(fp, end, SEEK_SET); /* Uninitialize. */ free(hfeat); writer->hfeat = NULL; writer->state = WSTATE_NONE; return 0;}int crf1mmw_put_feature(crf1mmw_t* writer, int fid, const crf1mm_feature_t* f){ FILE *fp = writer->fp; feature_header_t* hfeat = writer->hfeat; /* Make sure that we are writing attribute feature references. */ if (writer->state != WSTATE_FEATURES) { return CRFERR_INTERNAL_LOGIC; } /* We must put features #0, #1, ..., #(K-1) in this order. */ if (fid != hfeat->num) { return CRFERR_INTERNAL_LOGIC; } write_uint32(fp, f->type); write_uint32(fp, f->src); write_uint32(fp, f->dst); write_float(fp, f->weight); ++hfeat->num; return 0;}crf1mm_t* crf1mm_new(const char *filename){ FILE *fp = NULL; uint8_t* p = NULL; crf1mm_t *model = NULL; header_t *header = NULL; model = (crf1mm_t*)calloc(1, sizeof(crf1mm_t)); if (model == NULL) { goto error_exit; } fp = fopen(filename, "rb"); if (fp == NULL) { goto error_exit; } fseek(fp, 0, SEEK_END); model->size = (uint32_t)ftell(fp); fseek(fp, 0, SEEK_SET); model->buffer = (uint8_t*)malloc(model->size + 16); while ((uint32_t)model->buffer % 16 != 0) { ++model->buffer; } fread(model->buffer, 1, model->size, fp); fclose(fp); /* Write the file header. */ header = (header_t*)calloc(1, sizeof(header_t)); p = model->buffer; p += read_uint8_array(p, header->magic, sizeof(header->magic)); p += read_uint32(p, &header->size); p += read_uint8_array(p, header->type, sizeof(header->type)); p += read_uint32(p, &header->version); p += read_uint32(p, &header->num_features); p += read_uint32(p, &header->num_labels); p += read_uint32(p, &header->num_attrs); p += read_uint32(p, &header->off_features); p += read_uint32(p, &header->off_labels); p += read_uint32(p, &header->off_attrs); p += read_uint32(p, &header->off_labelrefs); p += read_uint32(p, &header->off_attrrefs); model->header = header; model->labels = cqdb_reader( model->buffer + header->off_labels, model->size - header->off_labels ); model->attrs = cqdb_reader( model->buffer + header->off_attrs, model->size - header->off_attrs ); return model;error_exit: if (model != NULL) { free(model); } if (fp != NULL) { fclose(fp); } return NULL;}void crf1mm_close(crf1mm_t* model){ if (model->labels != NULL) { cqdb_delete(model->labels); } if (model->attrs != NULL) { cqdb_delete(model->attrs); } free(model);}int crf1mm_get_num_attrs(crf1mm_t* model){ return model->header->num_attrs;}int crf1mm_get_num_labels(crf1mm_t* model){ return model->header->num_labels;}const char *crf1mm_to_label(crf1mm_t* model, int lid){ if (model->labels != NULL) { return cqdb_to_string(model->labels, lid); } else { return NULL; }}int crf1mm_to_lid(crf1mm_t* model, const char *value){ if (model->labels != NULL) { return cqdb_to_id(model->labels, value); } else { return -1; }}int crf1mm_to_aid(crf1mm_t* model, const char *value){ if (model->attrs != NULL) { return cqdb_to_id(model->attrs, value); } else { return -1; }}const char *crf1mm_to_attr(crf1mm_t* model, int aid){ if (model->attrs != NULL) { return cqdb_to_string(model->attrs, aid); } else { return NULL; }}int crf1mm_get_labelref(crf1mm_t* model, int lid, feature_refs_t* ref){ uint8_t *p = model->buffer; uint32_t offset; p += model->header->off_labelrefs; p += CHUNK_SIZE; p += sizeof(uint32_t) * lid; read_uint32(p, &offset); p = model->buffer + offset; p += read_uint32(p, &ref->num_features); ref->fids = (int*)p; return 0;}int crf1mm_get_attrref(crf1mm_t* model, int aid, feature_refs_t* ref){ uint8_t *p = model->buffer; uint32_t offset; p += model->header->off_attrrefs; p += CHUNK_SIZE; p += sizeof(uint32_t) * aid; read_uint32(p, &offset); p = model->buffer + offset; p += read_uint32(p, &ref->num_features); ref->fids = (int*)p; return 0;}int crf1mm_get_featureid(feature_refs_t* ref, int i){ uint32_t fid; uint8_t* p = (uint8_t*)ref->fids; p += sizeof(uint32_t) * i; read_uint32(p, &fid); return (int)fid;}int crf1mm_get_feature(crf1mm_t* model, int fid, crf1mm_feature_t* f){ uint8_t *p = NULL; uint32_t val = 0; uint32_t offset = model->header->off_features + CHUNK_SIZE; offset += FEATURE_SIZE * fid; p = model->buffer + offset; p += read_uint32(p, &val); f->type = val; p += read_uint32(p, &val); f->src = val; p += read_uint32(p, &val); f->dst = val; p += read_float(p, &f->weight); return 0;}void crf1mm_dump(crf1mm_t* crf1mm, FILE *fp){ int j; uint32_t i; feature_refs_t refs; const header_t* hfile = crf1mm->header; /* Dump the file header. */ fprintf(fp, "FILEHEADER = {\n"); fprintf(fp, " magic: %c%c%c%c\n", hfile->magic[0], hfile->magic[1], hfile->magic[2], hfile->magic[3]); fprintf(fp, " size: %d\n", hfile->size); fprintf(fp, " type: %c%c%c%c\n", hfile->type[0], hfile->type[1], hfile->type[2], hfile->type[3]); fprintf(fp, " version: %d\n", hfile->version); fprintf(fp, " num_features: %d\n", hfile->num_features); fprintf(fp, " num_labels: %d\n", hfile->num_labels); fprintf(fp, " num_attrs: %d\n", hfile->num_attrs); fprintf(fp, " off_features: 0x%X\n", hfile->off_features); fprintf(fp, " off_labels: 0x%X\n", hfile->off_labels); fprintf(fp, " off_attrs: 0x%X\n", hfile->off_attrs); fprintf(fp, " off_labelrefs: 0x%X\n", hfile->off_labelrefs); fprintf(fp, " off_attrrefs: 0x%X\n", hfile->off_attrrefs); fprintf(fp, "}\n"); fprintf(fp, "\n"); /* Dump the labels. */ fprintf(fp, "LABELS = {\n"); for (i = 0;i < hfile->num_labels;++i) { const char *str = crf1mm_to_label(crf1mm, i);#if 0 int check = crf1mm_to_lid(crf1mm, str); if (i != check) { fprintf(fp, "WARNING: inconsistent label CQDB\n"); }#endif fprintf(fp, " %5d: %s\n", i, str); } fprintf(fp, "}\n"); fprintf(fp, "\n"); /* Dump the attributes. */ fprintf(fp, "ATTRIBUTES = {\n"); for (i = 0;i < hfile->num_attrs;++i) { const char *str = crf1mm_to_attr(crf1mm, i);#if 0 int check = crf1mm_to_aid(crf1mm, str); if (i != check) { fprintf(fp, "WARNING: inconsistent attribute CQDB\n"); }#endif fprintf(fp, " %5d: %s\n", i, str); } fprintf(fp, "}\n"); fprintf(fp, "\n"); /* Dump the transition features. */ fprintf(fp, "TRANSITIONS = {\n"); for (i = 0;i < hfile->num_labels;++i) { crf1mm_get_labelref(crf1mm, i, &refs); for (j = 0;j < refs.num_features;++j) { crf1mm_feature_t f; int fid = crf1mm_get_featureid(&refs, j); const char *from = NULL, *to = NULL; crf1mm_get_feature(crf1mm, fid, &f); from = crf1mm_to_label(crf1mm, f.src); to = crf1mm_to_label(crf1mm, f.dst); fprintf(fp, " (%d) %s --> %s: %f\n", f.type, from, to, f.weight); } } fprintf(fp, "}\n"); fprintf(fp, "\n"); /* Dump the transition features. */ fprintf(fp, "TRANSITIONS_FROM_BOS = {\n"); crf1mm_get_labelref(crf1mm, hfile->num_labels, &refs); for (j = 0;j < refs.num_features;++j) { crf1mm_feature_t f; int fid = crf1mm_get_featureid(&refs, j); const char *to = NULL; crf1mm_get_feature(crf1mm, fid, &f); to = crf1mm_to_label(crf1mm, f.dst); fprintf(fp, " (%d) BOS --> %s: %f\n", f.type, to, f.weight); } fprintf(fp, "}\n"); fprintf(fp, "\n"); /* Dump the transition features. */ fprintf(fp, "TRANSITIONS_TO_EOS = {\n"); crf1mm_get_labelref(crf1mm, hfile->num_labels+1, &refs); for (j = 0;j < refs.num_features;++j) { crf1mm_feature_t f; int fid = crf1mm_get_featureid(&refs, j); const char *from = NULL; crf1mm_get_feature(crf1mm, fid, &f); from = crf1mm_to_label(crf1mm, f.src); fprintf(fp, " (%d) %s --> EOS: %f\n", f.type, from, f.weight); } fprintf(fp, "}\n"); fprintf(fp, "\n"); /* Dump the transition features. */ fprintf(fp, "STATE_FEATURES = {\n"); for (i = 0;i < hfile->num_attrs;++i) { crf1mm_get_attrref(crf1mm, i, &refs); for (j = 0;j < refs.num_features;++j) { crf1mm_feature_t f; int fid = crf1mm_get_featureid(&refs, j); const char *attr = NULL, *to = NULL; crf1mm_get_feature(crf1mm, fid, &f);#if 0 if (f.src != i) { fprintf(fp, "WARNING: an inconsistent attribute reference.\n"); }#endif attr = crf1mm_to_attr(crf1mm, f.src); to = crf1mm_to_label(crf1mm, f.dst); fprintf(fp, " (%d) %s --> %s: %f\n", f.type, attr, to, f.weight); } } fprintf(fp, "}\n"); fprintf(fp, "\n");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -