📄 naivebayesbin.c
字号:
/* Copyright (C) 2001-2002 Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Binary Naive Bayes learning algorithm. * * Should be used with tf vectorizer, without any normalizer. * * Implemented after description in Joachims, T., <em>A Probabilistic Analysis * of the Rocchio Algorithm with TFIDF for Text Categorization</em>, 1997. * * @author Mikael Ylikoski * @date 2001-2002 */#include <float.h>#include <math.h>#include <stdio.h>#include <stdlib.h>#include "multi.h"#include "utility.h"#include "vector.h"/** * Naive Bayes classifier class data. */typedef struct { vector *class; /**< Vector with word frequencies for class */ int nod; /**< Number Of Documents for class */ int now; /**< Number Of Words for class = vector_sum (class) */ vector *classn; /**< Vector with word frequencies for class */ int nodn; /**< Number Of Documents for class */ int nown; /**< Number Of Words for class = vector_sum (class) */ int nowds;} naivebayesbin_class;/** * Create a new classifier. * * @return The classifier. */void *naivebayesbin_new (void) { naivebayesbin_class *nbc; nbc = my_malloc (sizeof(naivebayesbin_class)); nbc->class = NULL; nbc->nod = 0; nbc->now = 0; nbc->classn = NULL; nbc->nodn = 0; nbc->nown = 0; nbc->nowds = 0; return nbc;}void *naivebayesbin_copy (void *data) { naivebayesbin_class *nbc; naivebayesbin_class *onbc; onbc = (naivebayesbin_class *)data; nbc = my_malloc (sizeof(naivebayesbin_class)); if (onbc->class) { nbc->class = vector_copy (onbc->class); if (!nbc->class) { free (nbc); return NULL; } } else nbc->class = NULL; nbc->nod = onbc->nod; nbc->now = onbc->now; if (onbc->classn) { nbc->classn = vector_copy (onbc->classn); if (!nbc->classn) { free (nbc->class); free (nbc); return NULL; } } else nbc->classn = NULL; nbc->nodn = onbc->nodn; nbc->nown = onbc->nown; return nbc;}voidnaivebayesbin_free (void *data) { naivebayesbin_class *nbc; nbc = (naivebayesbin_class *)data; if (nbc->class) vector_free (nbc->class); if (nbc->classn) vector_free (nbc->classn); free (nbc);}/** * Train classifier with a document vector. * * @param db classifier database * @param data class data * @param v term frequency vector for document to learn * @param class document class: 1 or -1 * @return 0 if ok; -1 otherwise. */intnaivebayesbin_learn (void *db, void *data, vector *v, int class) { int i; naivebayesbin_class *nbc; nbc = (naivebayesbin_class *)data; if (class > 0) { if (nbc->class == NULL) nbc->class = vector_copy (v); else vector_add (nbc->class, v); nbc->nod++; nbc->now += vector_sum (v); } else { if (nbc->classn == NULL) nbc->classn = vector_copy (v); else vector_add (nbc->classn, v); nbc->nodn++; nbc->nown += vector_sum (v); } i = vector_dim (v); if (i > nbc->nowds) nbc->nowds = i; return 0;}/** * Classify a document. * * @param db classifier database * @param data class data * @param v term frequency vector for the document to classify * @return The number of the most probable class. */doublenaivebayesbin_classify (void *db, void *data, vector *v) { int i, j; double d, e, p; naivebayesbin_class *nbc; nbc = (naivebayesbin_class *)data; if (!nbc->class || !nbc->classn) return 0; d = nbc->nowds; p = log (nbc->nod / (double)(nbc->nod + nbc->nodn)); for (i = 0; i < v->nel; i++) { e = (1 + vector_get_value (nbc->class, v->name[i])) / d; for (j = v->value[i]; j > 0; j--) /* McCallum & Nigam */ p += log (e / (double)j); /*p += log (e);*/ /* Aas & Eikvil */ } p -= log (nbc->nodn / (double)(nbc->nod + nbc->nodn)); for (i = 0; i < v->nel; i++) { e = (1 + vector_get_value (nbc->classn, v->name[i])) / d; for (j = v->value[i]; j > 0; j--) /* McCallum & Nigam */ p -= log (e / (double)j); /*p -= log (e);*/ /* Aas & Eikvil */ } return p;}void *naivebayesbin_load_class (FILE *file) { int i; naivebayesbin_class *ncl; ncl = my_malloc (sizeof(naivebayesbin_class)); i = fscanf (file, "nod1 %d\n", &ncl->nod); if (i != 1) { free (ncl); return NULL; } i = fscanf (file, "now1 %d\n", &ncl->now); if (i != 1) { free (ncl); return NULL; } fscanf (file, "vec1 "); ncl->class = vector_load (file); if (!ncl->class) { free (ncl); return NULL; } fscanf (file, "\n"); i = fscanf (file, "nod2 %d\n", &ncl->nodn); if (i != 1) { free (ncl); return NULL; } i = fscanf (file, "now2 %d\n", &ncl->nown); if (i != 1) { free (ncl); return NULL; } fscanf (file, "vec2 "); ncl->classn = vector_load (file); if (!ncl->classn) { free (ncl); return NULL; } fscanf (file, "\n"); i = fscanf (file, "nowds %d\n", &ncl->nowds); if (i != 1) { free (ncl); return NULL; } return ncl;}intnaivebayesbin_save_class (FILE *file, void *data) { naivebayesbin_class *ncl; ncl = (naivebayesbin_class *)data; fprintf (file, "nod1 %d\n", ncl->nod); fprintf (file, "now1 %d\n", ncl->now); fprintf (file, "vec1 "); vector_save (ncl->class, file); fprintf (file, "\n"); fprintf (file, "nod2 %d\n", ncl->nodn); fprintf (file, "now2 %d\n", ncl->nown); fprintf (file, "vec2 "); vector_save (ncl->classn, file); fprintf (file, "\n"); fprintf (file, "nowds %d\n", ncl->nowds); return 0;}/** * Keep cygwin happy. */intmain (void) { return 0;}/** * Naive Bayes classifier name. */const char *my_classifier_name = "BinaryNaiveBayes";/** * Naive Bayes classifier functions. */const multi_functions my_functions = { .new_db = NULL, .new = naivebayesbin_new, .copy = naivebayesbin_copy, .free = naivebayesbin_free, .learn = naivebayesbin_learn, .classify = naivebayesbin_classify, .load_class = naivebayesbin_load_class, .save_class = naivebayesbin_save_class, .option = OPTION_BINARY};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -