📄 from.c
字号:
/* Copyright (C) 2001-2002 Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * From address classifier. * * @author Mikael Ylikoski * @date 2001-2002 */#include <glib.h>#include <math.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include "doc_classifier.h"#include "from.h"#include "utility.h"/** * Sender eviction order. */enum e_order { FIFO, /**< First in first out */ MY /**< Experimental order */};typedef struct address_ address;/** * From classifier. */struct from_ { GHashTable *ht; /**< Hash Table */ int size; /**< Maximum size */ int noa; /**< Number Of Addresses */ int noc; /**< Number Of Classes */ int age; /**< Current time */ address *first; /**< First address in eviction list */ address *last; /**< Last address in eviction list */ enum e_order order; /**< Eviction order */};/** * From classifier address. */struct address_ { char *address; /**< Mail Address */ int *cf; /**< Class Frequency */ int soc; /**< Size of cf */ int sof; /**< Sum of frequency */ int age; /**< Last time seen */ address *prev; /**< Previous in eviction list */ address *next; /**< Next in eviction list */};/** * Create a new from classifier. * * @param size maximum number of addresses to save * @param ord eviction order * @return The new classifier. */from *from_new (int size, enum e_order ord) { from *fr; fr = my_malloc (sizeof(from)); fr->ht = g_hash_table_new (g_str_hash, g_str_equal); if (!fr->ht) { free (fr); return NULL; } fr->size = size; fr->noa = 0; fr->noc = 0; fr->age = 0; fr->first = NULL; fr->last = NULL; fr->order = ord; return fr;}void *from_load (FILE *f) { char buf[10]; int i, j, k; address *ad; from *fr; fscanf (f, "size %d\n", &i); fscanf (f, "order %5s\n", buf); if (!strcmp (buf, "fifo")) fr = from_new (i, FIFO); else if (!strcmp (buf, "my")) fr = from_new (i, MY); else return NULL; fscanf (f, "noa %d\n", &fr->noa); fscanf (f, "noc %d\n", &fr->noc); fscanf (f, "age %d\n", &fr->age); for (i = 0; i < fr->noa; i++) { ad = my_malloc (sizeof(address)); fscanf (f, "address %d:", &k); ad->address = my_malloc (k + 1); fread (ad->address, sizeof(char), k, f); ad->address[k] = '\0'; fscanf (f, "="); fscanf (f, "sof %d;", &ad->sof); fscanf (f, "soc %d;", &ad->soc); ad->cf = my_malloc (sizeof(int) * ad->soc); fscanf (f, "cf="); for (j = 0; j < ad->soc; j++) fscanf (f, "%d;", &ad->cf[j]); fscanf (f, "\n"); ad->prev = fr->last; ad->next = NULL; if (!fr->first) fr->first = ad; else fr->last->next = ad; fr->last = ad; } return fr;}intfrom_save (FILE *f, void *db) { int i; from *fr; address *ad; fr = (from *)db; fprintf (f, "size %d\n", fr->size); switch (fr->order) { case FIFO: fprintf (f, "order fifo\n"); break; case MY: fprintf (f, "order my\n"); break; } fprintf (f, "noa %d\n", fr->noa); fprintf (f, "noc %d\n", fr->noc); fprintf (f, "age %d\n", fr->age); for (ad = fr->first; ad; ad = ad->next) { fprintf (f, "address %d:%s=", strlen (ad->address), ad->address); fprintf (f, "sof %d;", ad->sof); fprintf (f, "soc %d;", ad->soc); fprintf (f, "cf="); for (i = 0; i < ad->soc; i++) fprintf (f, "%d;", ad->cf[i]); fprintf (f, "\n"); } return 0;}/** * Calculate value of address in database. * The bigger the value, the longer it stays. */static inline floatfrom_value (from *fr, address *ad) { switch (fr->order) { case FIFO: return ad->age; case MY: return ad->sof + log (200 / (double)(fr->age + 1 - ad->age)); } return 0;}/** * Move address within prune list. */static voidfrom_move_address (from *fr, address *ad) { double val; address *a; val = from_value (fr, ad); if (ad->prev && val < from_value (fr, ad->prev)) { ad->prev->next = ad->next; if (ad->next) ad->next->prev = ad->prev; else fr->last = ad->prev; for (a = ad->prev->prev; a != NULL; a = a->prev) if (val >= from_value (fr, a)) { ad->prev = a; ad->next = a->next; a->next->prev = ad; a->next = ad; return; } ad->prev = NULL; ad->next = fr->first; fr->first->prev = ad; fr->first = ad; } else if (ad->next && val >= from_value (fr, ad->next)) { ad->next->prev = ad->prev; if (ad->prev) ad->prev->next = ad->next; else fr->first = ad->next; for (a = ad->next->next; a != NULL; a = a->next) if (val < from_value (fr, a)) { ad->prev = a->prev; ad->next = a; a->prev->next = ad; a->prev = ad; return; } ad->next = NULL; ad->prev = fr->last; fr->last->next = ad; fr->last = ad; }}/** * Add address into prune list. */static voidfrom_add_address (from *fr, address *ad) { if (!fr->first) { fr->first = ad; fr->last = ad; ad->prev = NULL; ad->next = NULL; } else { fr->last->next = ad; ad->prev = fr->last; ad->next = NULL; fr->last = ad; }}/** * Free memory used by address. */static inline voidfrom_free_address (address *ad) { free (ad->address); free (ad->cf); free (ad);}/** * Remove an address. * * @param fr from classifier */static intfrom_prune_address (from *fr) { address *ad; if (!fr->first) return -1; ad = fr->first; fr->first = fr->first->next; if (!fr->first) fr->last = NULL; else fr->first->prev = NULL; g_hash_table_remove (fr->ht, ad->address); /* fprintf (stderr, ":age=%d:sof=%d:%s:\n", fr->age - ad->age, ad->sof, ad->address); */ from_free_address (ad); return 0;}/** * "Add" an address. * Increases the frequency of the address for a class. * * @param fr from classifier * @param ad address * @param class class */intfrom_learn (from *fr, const char *ad, int class) { int i; address *f; f = g_hash_table_lookup (fr->ht, ad); if (!f) { if (fr->noa >= fr->size) from_prune_address (fr); else fr->noa++; f = my_malloc (sizeof(address)); f->address = my_strdup (ad); f->cf = NULL; f->soc = 0; f->age = fr->age; f->sof = 0; g_hash_table_insert (fr->ht, f->address, f); from_add_address (fr, f); } if (class >= f->soc) { f->cf = my_realloc (f->cf, sizeof(int) * (class + 1)); for (i = f->soc; i <= class; i++) f->cf[i] = 0; f->soc = class + 1; } f->cf[class]++; f->sof++; f->age = fr->age; from_move_address (fr, f); if (class > fr->noc) fr->noc = class; fr->age++; return 0;}/** * Classify an address. * * @param fr from classifier * @param ad address */intfrom_classify (from *fr, const char *ad) { int i, j; double d; address *f; f = g_hash_table_lookup (fr->ht, ad); if (!f) return -1; j = 0; d = f->cf[0]; for (i = 1; i < f->soc; i++) if (f->cf[i] > d) { j = i; d = f->cf[i]; } if (d > 3 && d / (double)f->sof > 0.6) return j; return -1;}#define MAXSIZE 3/** * Classify an address. * * @param fr from classifier * @param ad address * @return List. */int *from_classify_list (from *fr, const char *ad) { int i, j; int *r; double d; address *f; f = g_hash_table_lookup (fr->ht, ad); if (!f) return NULL; r = my_malloc (sizeof(int) * (MAXSIZE + 1)); for (i = 0; i < MAXSIZE + 1; i++) r[i] = -1; /* if (f->sof < 3) return r; */ for (i = 0; i < f->soc; i++) { d = f->cf[i] / (double)f->sof; if (f->sof < 15) { if (d > 0.9 && (r[0] == -1 || f->cf[i] > f->cf[r[0]])) r[0] = i; } else if (d > 0.4) { for (j = 0; j < MAXSIZE; j++) if (r[j] == -1) { r[j] = i; break; } else if (f->cf[i] > f->cf[r[j]]) { memmove (&r[j + 1], &r[j], sizeof(int) * (MAXSIZE - j - 1)); r[j] = i; break; } } } return r;}/** * Classify an address. * * @param fr from classifier * @param ad address * @return list of scores for classes */double *from_classify_score (from *fr, const char *ad) { int i; double *d, j; address *f; f = g_hash_table_lookup (fr->ht, ad); if (!f) return NULL; for (j = i = 0; i < f->soc; i++) j += f->cf[i]; d = my_calloc (fr->noc, sizeof(double)); for (i = 0; i < f->soc; i++) d[i] = f->cf[i] / j; return d;}/** * Print a from classifier on stdout. * * @param fr from classifier to print */voidfrom_print (from *fr) { address *a; for (a = fr->first; a; a = a->next) printf ("%s: sof=%d age=%d\n", ((address *)a)->address, ((address *)a)->sof, fr->age - ((address *)a)->age);}void *from_new_doc (const char *opts) { char *s; int n; enum e_order ord; n = 100; ord = FIFO; if (opts) { n = get_opt_int (opts, "n="); if (n < 1) n = 100; s = get_opt_str (opts, "o="); if (s) { if (!strncmp (s, "fifo", 4)) ord = FIFO; else if (!strncmp (s, "my", 2)) ord = MY; } } return from_new (n, ord);}intfrom_learn_doc (void *db, void *data, int class) { from *fr; document *doc; const char *ad; fr = (from *)db; doc = (document *)data; ad = document_get_from (doc); if (!ad) return -1; return from_learn (fr, ad, class);}int *from_classify_doc_rank (void *db, void *data) { from *fr; document *doc; const char *ad; fr = (from *)db; doc = (document *)data; ad = document_get_from (doc); if (!ad) return NULL; return from_classify_list (fr, ad);}/** * Keep cygwin happy. */intmain (void) { return 0;}const char *my_doc_classifier_name = "From";const doc_classifier_functions my_functions = { .new = from_new_doc, //.load = from_load, //.save = from_save, .learn = from_learn_doc, .classify_rank = from_classify_doc_rank};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -