⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ngram.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
字号:
/* Copyright (C) 2001-2002  Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * N-gram learning learning algorithm. * * Should be used with ngram tokenizer and tf vectorizer without a normalizer. * * @author  Mikael Ylikoski * @date    2001-2002 */#include <float.h>#include <math.h>#include <stdio.h>#include <stdlib.h>#include "multi.h"#include "ngram.h"#include "utility.h"#include "vector.h"/** * N-gram classifier global data. */struct ngram_db_ {    int now;		/**< Number Of Words in dictionary */};/** * N-gram classifier class data. */typedef struct {    vector *class;	/**< Vector with word frequencies for class */} ngram_class;/** * Create a new global state. * * @return  The new global state. */void *ngram_new_db (const char *opts) {    ngram_db *db;    db = my_malloc (sizeof(ngram_db));    db->now = 0;    return db;}/** * Create a new classifier. * * @return  The classifier. */void *ngram_new (void) {    ngram_class *nc;    nc = my_malloc (sizeof(ngram_class));    nc->class = NULL;    return nc;}void *ngram_copy (void *data) {    ngram_class *nc;    ngram_class *nnc;    nc = (ngram_class *)data;    nnc = my_malloc (sizeof(ngram_class));    if (nc->class)	nnc->class = vector_copy (nc->class);    else	nnc->class = NULL;    return nnc;}voidngram_free (void *data) {    ngram_class *nc;    nc = (ngram_class *)data;    if (nc->class)	vector_free (nc->class);    free (nc);}/** * Train classifier with a document vector. * * @param db     classifier database * @param data   class data * @param v      term frequency vector for document to learn * @param class  document class: 1 or -1 * @return  0 if ok; -1 otherwise. */intngram_learn (void *db, void *data, vector *v, int class) {    int i;    ngram_class *nbc;    ngram_db *ndb;    if (class != 1)	return -1;    nbc = (ngram_class *)data;    ndb = (ngram_db *)db;    if (nbc->class == NULL)	nbc->class = vector_copy (v);    else	vector_add (nbc->class, v);    i = vector_dim (v);    if (i > ndb->now)	ndb->now = i;    return 0;}/** * Unlearn classifier with a document vector. * * @param db     classifier database * @param data   class data * @param v      term frequency vector for document to unlearn * @param class  document class; 1 or -1 * @return  0 if ok; -1 otherwise. */intngram_unlearn (void *db, void *data, vector *v, int class) {    ngram_class *nbc;    ngram_db *ndb;    if (class != 1)	return -1;    nbc = (ngram_class *)data;    ndb = (ngram_db *)db;    if (nbc->class == NULL)	return -1;    vector_sub (nbc->class, v);    if (vector_dim (v) == ndb->now)	ndb->now = vector_dim (nbc->class);    return 0;}/** * Classify a document. * * @param db    classifier database * @param data  class data * @param v     term frequency vector for the document to classify * @return  The relative probability of the class. */doublengram_classify (void *db, void *data, vector *v) {    ngram_class *nbc;    ngram_db *ndb;    nbc = (ngram_class *)data;    ndb = (ngram_db *)db;    if (!nbc->class)	return 0;    return -vector_relative_entropy (v, nbc->class, ndb->now);}void *ngram_load_db (FILE *file) {    int i;    ngram_db *ndb;    ndb = my_malloc (sizeof(ngram_db));    i = fread (&ndb->now, sizeof(int), 1, file);    if (i != 1) {	free (ndb);	return NULL;    }    return ndb;}void *ngram_load_class (FILE *file) {    ngram_class *ncl;    ncl = my_malloc (sizeof(ngram_class));    ncl->class = vector_load (file);    if (!ncl->class) {	free (ncl);	return NULL;    }    return ncl;}intngram_save_db (FILE *file, void *db) {    int i;    ngram_db *ndb;    ndb = (ngram_db *)db;    i = fwrite (&ndb->now, sizeof(int), 1, file);    if (i != 1)	return -1;    return 0;}intngram_save_class (FILE *file, void *data) {    ngram_class *ncl;    ncl = (ngram_class *)data;    vector_save (ncl->class, file);    return 0;}/** * Keep cygwin happy. */intmain (void) {    return 0;}/** * N-gram classifier name. */const char *my_classifier_name = "N-gram";/** * N-gram classifier functions. */const multi_functions my_functions = {    .new_db = ngram_new_db,    .new = ngram_new,    .copy = ngram_copy,    .free = ngram_free,    .learn = ngram_learn,    .classify = ngram_classify,    .load_db = ngram_load_db,    .load_class = ngram_load_class,    .save_db = ngram_save_db,    .save_class = ngram_save_class,    .option = 0};/** * Load a db from a text_cat language model file. *multi_db *ngram_load_file_db (char *file) {    int i, j;    FILE *f;    f = fopen(file, "r");    for (i = 0; i < 400; i++) {	for (j = 0; j < 9; j++)	    if (0)		;	scanf();    }    return NULL;}*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -