⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 naivebayes.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
字号:
/* Copyright (C) 2001-2002  Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Naive Bayes learning algorithm. * * Should be used with tf vectorizer, without any normalizer. * * Implemented after description in Joachims, T., <em>A Probabilistic Analysis * of the Rocchio Algorithm with TFIDF for Text Categorization</em>, 1997. * * @author  Mikael Ylikoski * @date    2001-2002 */#include <float.h>#include <math.h>#include <stdio.h>#include <stdlib.h>#include "multi.h"#include "naivebayes.h"#include "utility.h"#include "vector.h"/** * Naive Bayes classifier global data. */typedef struct {    int nod;		/**< Number Of Documents in total */    int now;		/**< Number Of Words in dictionary */} naivebayes_db;/** * Naive Bayes classifier class data. */typedef struct {    vector *tf;		/**< Vector with term frequencies for class */    int nod;		/**< Number Of Documents for class */    float now;		/**< Number Of Words for class = vector_sum (tf) */} naivebayes_class;/** * Create a new classifier database. * * @param opts  classifier options, not used * @return  The new classifier database. */void *naivebayes_new_db (const char *opts) {    naivebayes_db *db;    db = my_malloc (sizeof(naivebayes_db));    db->nod = 0;    db->now = 0;    return db;}/** * Create a new classifier. * * @return  The classifier. */void *naivebayes_new (void) {    naivebayes_class *nbc;    nbc = my_malloc (sizeof(naivebayes_class));    nbc->tf = NULL;    nbc->nod = 0;    nbc->now = 0;    return nbc;}void *naivebayes_copy (void *data) {    naivebayes_class *nbc;    naivebayes_class *onbc;    onbc = (naivebayes_class *)data;    nbc = my_malloc (sizeof(naivebayes_class));    if (onbc->tf) {	nbc->tf = vector_copy (onbc->tf);	if (!nbc->tf) {	    free (nbc);	    return NULL;	}    } else	nbc->tf = NULL;    nbc->nod = onbc->nod;    nbc->now = onbc->now;    return nbc;}voidnaivebayes_free (void *data) {    naivebayes_class *nbc;    nbc = (naivebayes_class *)data;    if (nbc->tf)	vector_free (nbc->tf);    free (nbc);}/** * Train classifier with a document vector. * * @param db     classifier database * @param data   class data * @param v      term frequency vector for document to learn * @param class  document class: 1 or -1 * @return  0 if ok; -1 otherwise. */intnaivebayes_learn (void *db, void *data, vector *v, int class) {    int i;    naivebayes_class *nbc;    naivebayes_db *ndb;    if (class != 1)	return -1;    nbc = (naivebayes_class *)data;    ndb = (naivebayes_db *)db;    if (nbc->tf == NULL)	nbc->tf = vector_copy (v);    else	vector_add (nbc->tf, v);    nbc->nod++;    nbc->now += vector_sum (v);    ndb->nod++;    i = vector_dim (v);    if (i > ndb->now)	ndb->now = i;    return 0;}/** * Unlearn classifier with a document vector. * * @param db     classifier database * @param data   class data * @param v      term frequency vector for document to unlearn * @param class  document class; 1 or -1 * @return  0 if ok; -1 otherwise. */intnaivebayes_unlearn (void *db, void *data, vector *v, int class) {    naivebayes_class *nbc;    naivebayes_db *ndb;    if (class != 1)	return -1;    nbc = (naivebayes_class *)data;    ndb = (naivebayes_db *)db;    if (nbc->tf == NULL)	return -1;    vector_sub (nbc->tf, v);    nbc->nod--;    nbc->now -= vector_sum (v);    ndb->nod--;    if (vector_dim (v) == ndb->now)	ndb->now = vector_dim (nbc->tf);    return 0;}intnaivebayes_remove_global (void *db, vector *v) {    naivebayes_db *ndb;    ndb = (naivebayes_db *)db;    ndb->now -= v->nel;    /*    i = vector_dim (v);    if (i > ndb->now)	ndb->now = i;    */    return 0;}intnaivebayes_remove (void *db, void *data, vector *v) {    naivebayes_class *nbc;    naivebayes_db *ndb;    nbc = (naivebayes_class *)data;    ndb = (naivebayes_db *)db;    if (nbc->tf == NULL)	return 0;    nbc->now -= vector_remove_v (nbc->tf, v);    //nbc->now -= ;    return 0;}/** * Classify a document. * * @param db    classifier database * @param data  class data * @param v     term frequency vector for the document to classify * @return  The number of the most probable class. */doublenaivebayes_classify (void *db, void *data, vector *v) {    int i, j;    double d, e, p;    naivebayes_class *nbc;    naivebayes_db *ndb;    nbc = (naivebayes_class *)data;    ndb = (naivebayes_db *)db;    if (!nbc->tf)	return 0;    if (nbc->now == 0)	return 0;    d = ndb->now + nbc->now;    p = log (nbc->nod / (double)ndb->nod);    for (i = 0; i < v->nel; i++) {	e = (1 + vector_get_value (nbc->tf, v->name[i])) / d;	for (j = v->value[i]; j > 0; j--)	/* McCallum & Nigam */	    p += log (e / (double)j);	/*p += log (e);*/			/* Aas & Eikvil */    }    return p;}void *naivebayes_load_db (FILE *file) {    int i;    naivebayes_db *ndb;    ndb = my_malloc (sizeof(naivebayes_db));    i = fscanf (file, "nod %d\n", &ndb->nod);    i = fscanf (file, "now %d\n", &ndb->now);    return ndb;}void *naivebayes_load_class (FILE *file) {    int i;    naivebayes_class *ncl;    ncl = my_malloc (sizeof(naivebayes_class));    i = fscanf (file, "nod %d\n", &ncl->nod);    if (i != 1) {	free (ncl);	return NULL;    }    i = fscanf (file, "now %f\n", &ncl->now);    if (i != 1) {	free (ncl);	return NULL;    }    fscanf (file, "vec ");    ncl->tf = vector_load (file);    if (!ncl->tf) {	free (ncl);	return NULL;    }    fscanf (file, "\n");    return ncl;}intnaivebayes_save_db (FILE *file, void *db) {    naivebayes_db *ndb;    ndb = (naivebayes_db *)db;    fprintf (file, "nod %d\n", ndb->nod);    fprintf (file, "now %d\n", ndb->now);    return 0;}intnaivebayes_save_class (FILE *file, void *data) {    naivebayes_class *ncl;    ncl = (naivebayes_class *)data;    fprintf (file, "nod %d\n", ncl->nod);    fprintf (file, "now %f\n", ncl->now);    fprintf (file, "vec ");    vector_save (ncl->tf, file);    fprintf (file, "\n");    return 0;}/** * Keep cygwin happy. */intmain (void) {    return 0;}/** * Naive Bayes classifier name. */const char *my_classifier_name = "NaiveBayes";/** * Naive Bayes classifier functions. */const multi_functions my_functions = {    .new_db = naivebayes_new_db,    .new = naivebayes_new,    .copy = naivebayes_copy,    .free = naivebayes_free,    .learn = naivebayes_learn,    .unlearn = naivebayes_unlearn,    .remove = naivebayes_remove,    .remove_db = naivebayes_remove_global,    .classify = naivebayes_classify,    .load_db = naivebayes_load_db,    .load_class = naivebayes_load_class,    .save_db = naivebayes_save_db,    .save_class = naivebayes_save_class,    .option = 0};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -