⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 naivebayesbin.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
字号:
/* Copyright (C) 2001-2002  Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Binary Naive Bayes learning algorithm. * * Should be used with tf vectorizer, without any normalizer. * * Implemented after description in Joachims, T., <em>A Probabilistic Analysis * of the Rocchio Algorithm with TFIDF for Text Categorization</em>, 1997. * * @author  Mikael Ylikoski * @date    2001-2002 */#include <float.h>#include <math.h>#include <stdio.h>#include <stdlib.h>#include "multi.h"#include "utility.h"#include "vector.h"/** * Naive Bayes classifier class data. */typedef struct {    vector *class;	/**< Vector with word frequencies for class */    int nod;		/**< Number Of Documents for class */    int now;		/**< Number Of Words for class = vector_sum (class) */    vector *classn;	/**< Vector with word frequencies for class */    int nodn;		/**< Number Of Documents for class */    int nown;		/**< Number Of Words for class = vector_sum (class) */    int nowds;} naivebayesbin_class;/** * Create a new classifier. * * @return  The classifier. */void *naivebayesbin_new (void) {    naivebayesbin_class *nbc;    nbc = my_malloc (sizeof(naivebayesbin_class));    nbc->class = NULL;    nbc->nod = 0;    nbc->now = 0;    nbc->classn = NULL;    nbc->nodn = 0;    nbc->nown = 0;    nbc->nowds = 0;    return nbc;}void *naivebayesbin_copy (void *data) {    naivebayesbin_class *nbc;    naivebayesbin_class *onbc;    onbc = (naivebayesbin_class *)data;    nbc = my_malloc (sizeof(naivebayesbin_class));    if (onbc->class) {	nbc->class = vector_copy (onbc->class);	if (!nbc->class) {	    free (nbc);	    return NULL;	}    } else	nbc->class = NULL;    nbc->nod = onbc->nod;    nbc->now = onbc->now;    if (onbc->classn) {	nbc->classn = vector_copy (onbc->classn);	if (!nbc->classn) {	    free (nbc->class);	    free (nbc);	    return NULL;	}    } else	nbc->classn = NULL;    nbc->nodn = onbc->nodn;    nbc->nown = onbc->nown;    return nbc;}voidnaivebayesbin_free (void *data) {    naivebayesbin_class *nbc;    nbc = (naivebayesbin_class *)data;    if (nbc->class)	vector_free (nbc->class);    if (nbc->classn)	vector_free (nbc->classn);    free (nbc);}/** * Train classifier with a document vector. * * @param db     classifier database * @param data   class data * @param v      term frequency vector for document to learn * @param class  document class: 1 or -1 * @return  0 if ok; -1 otherwise. */intnaivebayesbin_learn (void *db, void *data, vector *v, int class) {    int i;    naivebayesbin_class *nbc;    nbc = (naivebayesbin_class *)data;    if (class > 0) {	if (nbc->class == NULL)	    nbc->class = vector_copy (v);	else	    vector_add (nbc->class, v);	nbc->nod++;	nbc->now += vector_sum (v);    } else {	if (nbc->classn == NULL)	    nbc->classn = vector_copy (v);	else	    vector_add (nbc->classn, v);	nbc->nodn++;	nbc->nown += vector_sum (v);    }    i = vector_dim (v);    if (i > nbc->nowds)	nbc->nowds = i;    return 0;}/** * Classify a document. * * @param db    classifier database * @param data  class data * @param v     term frequency vector for the document to classify * @return  The number of the most probable class. */doublenaivebayesbin_classify (void *db, void *data, vector *v) {    int i, j;    double d, e, p;    naivebayesbin_class *nbc;    nbc = (naivebayesbin_class *)data;    if (!nbc->class || !nbc->classn)	return 0;    d = nbc->nowds;    p = log (nbc->nod / (double)(nbc->nod + nbc->nodn));    for (i = 0; i < v->nel; i++) {	e = (1 + vector_get_value (nbc->class, v->name[i])) / d;	for (j = v->value[i]; j > 0; j--)	/* McCallum & Nigam */	    p += log (e / (double)j);	/*p += log (e);*/			/* Aas & Eikvil */    }    p -= log (nbc->nodn / (double)(nbc->nod + nbc->nodn));    for (i = 0; i < v->nel; i++) {	e = (1 + vector_get_value (nbc->classn, v->name[i])) / d;	for (j = v->value[i]; j > 0; j--)	/* McCallum & Nigam */	    p -= log (e / (double)j);	/*p -= log (e);*/			/* Aas & Eikvil */    }    return p;}void *naivebayesbin_load_class (FILE *file) {    int i;    naivebayesbin_class *ncl;    ncl = my_malloc (sizeof(naivebayesbin_class));    i = fscanf (file, "nod1 %d\n", &ncl->nod);    if (i != 1) {	free (ncl);	return NULL;    }    i = fscanf (file, "now1 %d\n", &ncl->now);    if (i != 1) {	free (ncl);	return NULL;    }    fscanf (file, "vec1 ");    ncl->class = vector_load (file);    if (!ncl->class) {	free (ncl);	return NULL;    }    fscanf (file, "\n");    i = fscanf (file, "nod2 %d\n", &ncl->nodn);    if (i != 1) {	free (ncl);	return NULL;    }    i = fscanf (file, "now2 %d\n", &ncl->nown);    if (i != 1) {	free (ncl);	return NULL;    }    fscanf (file, "vec2 ");    ncl->classn = vector_load (file);    if (!ncl->classn) {	free (ncl);	return NULL;    }    fscanf (file, "\n");    i = fscanf (file, "nowds %d\n", &ncl->nowds);    if (i != 1) {	free (ncl);	return NULL;    }    return ncl;}intnaivebayesbin_save_class (FILE *file, void *data) {    naivebayesbin_class *ncl;    ncl = (naivebayesbin_class *)data;    fprintf (file, "nod1 %d\n", ncl->nod);    fprintf (file, "now1 %d\n", ncl->now);    fprintf (file, "vec1 ");    vector_save (ncl->class, file);    fprintf (file, "\n");    fprintf (file, "nod2 %d\n", ncl->nodn);    fprintf (file, "now2 %d\n", ncl->nown);    fprintf (file, "vec2 ");    vector_save (ncl->classn, file);    fprintf (file, "\n");    fprintf (file, "nowds %d\n", ncl->nowds);    return 0;}/** * Keep cygwin happy. */intmain (void) {    return 0;}/** * Naive Bayes classifier name. */const char *my_classifier_name = "BinaryNaiveBayes";/** * Naive Bayes classifier functions. */const multi_functions my_functions = {    .new_db = NULL,    .new = naivebayesbin_new,    .copy = naivebayesbin_copy,    .free = naivebayesbin_free,    .learn = naivebayesbin_learn,    .classify = naivebayesbin_classify,    .load_class = naivebayesbin_load_class,    .save_class = naivebayesbin_save_class,    .option = OPTION_BINARY};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -