⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 from.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
字号:
/* Copyright (C) 2001-2002  Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * From address classifier. * * @author  Mikael Ylikoski * @date    2001-2002 */#include <glib.h>#include <math.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include "doc_classifier.h"#include "from.h"#include "utility.h"/** * Sender eviction order. */enum e_order {    FIFO,		/**< First in first out */    MY			/**< Experimental order */};typedef struct address_ address;/** * From classifier. */struct from_ {    GHashTable *ht;	/**< Hash Table */    int size;		/**< Maximum size */    int noa;		/**< Number Of Addresses */    int noc;		/**< Number Of Classes */    int age;		/**< Current time */    address *first;	/**< First address in eviction list */    address *last;	/**< Last address in eviction list */    enum e_order order;	/**< Eviction order */};/** * From classifier address. */struct address_ {    char *address;	/**< Mail Address */    int *cf;		/**< Class Frequency */    int soc;		/**< Size of cf */    int sof;		/**< Sum of frequency */    int age;		/**< Last time seen */    address *prev;	/**< Previous in eviction list */    address *next;	/**< Next in eviction list */};/** * Create a new from classifier. * * @param size  maximum number of addresses to save * @param ord	eviction order * @return The new classifier. */from *from_new (int size, enum e_order ord) {    from *fr;    fr = my_malloc (sizeof(from));    fr->ht = g_hash_table_new (g_str_hash, g_str_equal);    if (!fr->ht) {	free (fr);	return NULL;    }    fr->size = size;    fr->noa = 0;    fr->noc = 0;    fr->age = 0;    fr->first = NULL;    fr->last = NULL;    fr->order = ord;    return fr;}void *from_load (FILE *f) {    char buf[10];    int i, j, k;    address *ad;    from *fr;    fscanf (f, "size %d\n", &i);    fscanf (f, "order %5s\n", buf);    if (!strcmp (buf, "fifo"))	fr = from_new (i, FIFO);    else if (!strcmp (buf, "my"))	fr = from_new (i, MY);    else	return NULL;    fscanf (f, "noa %d\n", &fr->noa);    fscanf (f, "noc %d\n", &fr->noc);    fscanf (f, "age %d\n", &fr->age);    for (i = 0; i < fr->noa; i++) {	ad = my_malloc (sizeof(address));	fscanf (f, "address %d:", &k);	ad->address = my_malloc (k + 1);	fread (ad->address,  sizeof(char), k, f);	ad->address[k] = '\0';	fscanf (f, "=");	fscanf (f, "sof %d;", &ad->sof);	fscanf (f, "soc %d;", &ad->soc);	ad->cf = my_malloc (sizeof(int) * ad->soc);	fscanf (f, "cf=");	for (j = 0; j < ad->soc; j++)	    fscanf (f, "%d;", &ad->cf[j]);	fscanf (f, "\n");	ad->prev = fr->last;	ad->next = NULL;	if (!fr->first)	    fr->first = ad;	else	    fr->last->next = ad;	fr->last = ad;    }    return fr;}intfrom_save (FILE *f, void *db) {    int i;    from *fr;    address *ad;    fr = (from *)db;    fprintf (f, "size %d\n", fr->size);    switch (fr->order) {    case FIFO:	fprintf (f, "order fifo\n");	break;    case MY:	fprintf (f, "order my\n");	break;    }    fprintf (f, "noa %d\n", fr->noa);    fprintf (f, "noc %d\n", fr->noc);    fprintf (f, "age %d\n", fr->age);    for (ad = fr->first; ad; ad = ad->next) {	fprintf (f, "address %d:%s=", strlen (ad->address), ad->address);	fprintf (f, "sof %d;", ad->sof);	fprintf (f, "soc %d;", ad->soc);	fprintf (f, "cf=");	for (i = 0; i < ad->soc; i++)	    fprintf (f, "%d;", ad->cf[i]);	fprintf (f, "\n");    }    return 0;}/** * Calculate value of address in database. * The bigger the value, the longer it stays. */static inline floatfrom_value (from *fr, address *ad) {    switch (fr->order) {    case FIFO:	return ad->age;    case MY:	return ad->sof + log (200 / (double)(fr->age + 1 - ad->age));    }    return 0;}/** * Move address within prune list. */static voidfrom_move_address (from *fr, address *ad) {    double val;    address *a;    val = from_value (fr, ad);    if (ad->prev && val < from_value (fr, ad->prev)) {	ad->prev->next = ad->next;	if (ad->next)	    ad->next->prev = ad->prev;	else	    fr->last = ad->prev;	for (a = ad->prev->prev; a != NULL; a = a->prev)	    if (val >= from_value (fr, a)) {		ad->prev = a;		ad->next = a->next;		a->next->prev = ad;		a->next = ad;		return;	    }	ad->prev = NULL;	ad->next = fr->first;	fr->first->prev = ad;	fr->first = ad;    } else if (ad->next && val >= from_value (fr, ad->next)) {	ad->next->prev = ad->prev;	if (ad->prev)	    ad->prev->next = ad->next;	else	    fr->first = ad->next;	for (a = ad->next->next; a != NULL; a = a->next)	    if (val < from_value (fr, a)) {		ad->prev = a->prev;		ad->next = a;		a->prev->next = ad;		a->prev = ad;		return;	    }	ad->next = NULL;	ad->prev = fr->last;	fr->last->next = ad;	fr->last = ad;    }}/** * Add address into prune list. */static voidfrom_add_address (from *fr, address *ad) {    if (!fr->first) {	fr->first = ad;	fr->last = ad;	ad->prev = NULL;	ad->next = NULL;    } else {	fr->last->next = ad;	ad->prev = fr->last;	ad->next = NULL;	fr->last = ad;    }}/** * Free memory used by address. */static inline voidfrom_free_address (address *ad) {    free (ad->address);    free (ad->cf);    free (ad);}/** * Remove an address. * * @param fr  from classifier */static intfrom_prune_address (from *fr) {    address *ad;    if (!fr->first)	return -1;    ad = fr->first;    fr->first = fr->first->next;    if (!fr->first)	fr->last = NULL;    else	fr->first->prev = NULL;    g_hash_table_remove (fr->ht, ad->address);    /*    fprintf (stderr, ":age=%d:sof=%d:%s:\n",	     fr->age - ad->age, ad->sof, ad->address);    */    from_free_address (ad);    return 0;}/** * "Add" an address. * Increases the frequency of the address for a class. * * @param fr     from classifier * @param ad     address * @param class  class */intfrom_learn (from *fr, const char *ad, int class) {    int i;    address *f;    f = g_hash_table_lookup (fr->ht, ad);    if (!f) {	if (fr->noa >= fr->size)	    from_prune_address (fr);	else	    fr->noa++;	f = my_malloc (sizeof(address));	f->address = my_strdup (ad);	f->cf = NULL;	f->soc = 0;	f->age = fr->age;	f->sof = 0;	g_hash_table_insert (fr->ht, f->address, f);	from_add_address (fr, f);    }    if (class >= f->soc) {	f->cf = my_realloc (f->cf, sizeof(int) * (class + 1));	for (i = f->soc; i <= class; i++)	    f->cf[i] = 0;	f->soc = class + 1;    }    f->cf[class]++;    f->sof++;    f->age = fr->age;    from_move_address (fr, f);    if (class > fr->noc)	fr->noc = class;    fr->age++;    return 0;}/** * Classify an address. * * @param fr     from classifier * @param ad     address */intfrom_classify (from *fr, const char *ad) {    int i, j;    double d;    address *f;    f = g_hash_table_lookup (fr->ht, ad);    if (!f)	return -1;    j = 0;    d = f->cf[0];    for (i = 1; i < f->soc; i++)	if (f->cf[i] > d) {	    j = i;	    d = f->cf[i];	}    if (d > 3 && d / (double)f->sof > 0.6)	return j;    return -1;}#define MAXSIZE 3/** * Classify an address. * * @param fr  from classifier * @param ad  address * @return  List. */int *from_classify_list (from *fr, const char *ad) {    int i, j;    int *r;    double d;    address *f;    f = g_hash_table_lookup (fr->ht, ad);    if (!f)	return NULL;    r = my_malloc (sizeof(int) * (MAXSIZE + 1));    for (i = 0; i < MAXSIZE + 1; i++)	r[i] = -1;    /*    if (f->sof < 3)	return r;    */    for (i = 0; i < f->soc; i++) {	d = f->cf[i] / (double)f->sof;	if (f->sof < 15) {	    if (d > 0.9 && (r[0] == -1 || f->cf[i] > f->cf[r[0]]))		r[0] = i;	} else	    if (d > 0.4) {		for (j = 0; j < MAXSIZE; j++)		    if (r[j] == -1) {			r[j] = i;			break;		    } else if (f->cf[i] > f->cf[r[j]]) {			memmove (&r[j + 1], &r[j],				 sizeof(int) * (MAXSIZE - j - 1));			r[j] = i;			break;		    }	    }    }    return r;}/** * Classify an address. * * @param fr  from classifier * @param ad  address * @return  list of scores for classes */double *from_classify_score (from *fr, const char *ad) {    int i;    double *d, j;    address *f;    f = g_hash_table_lookup (fr->ht, ad);    if (!f)	return NULL;    for (j = i = 0; i < f->soc; i++)	j += f->cf[i];    d = my_calloc (fr->noc, sizeof(double));    for (i = 0; i < f->soc; i++)	d[i] = f->cf[i] / j;    return d;}/** * Print a from classifier on stdout. * * @param fr  from classifier to print */voidfrom_print (from *fr) {    address *a;    for (a = fr->first; a; a = a->next)	printf ("%s: sof=%d age=%d\n", ((address *)a)->address,		((address *)a)->sof, fr->age - ((address *)a)->age);}void *from_new_doc (const char *opts) {    char *s;    int n;    enum e_order ord;    n = 100;    ord = FIFO;    if (opts) {	n = get_opt_int (opts, "n=");	if (n < 1)	    n = 100;	s = get_opt_str (opts, "o=");	if (s) {	    if (!strncmp (s, "fifo", 4))		ord = FIFO;	    else if (!strncmp (s, "my", 2))		ord = MY;	}    }    return from_new (n, ord);}intfrom_learn_doc (void *db, void *data, int class) {    from *fr;    document *doc;    const char *ad;    fr = (from *)db;    doc = (document *)data;    ad = document_get_from (doc);    if (!ad)	return -1;    return from_learn (fr, ad, class);}int *from_classify_doc_rank (void *db, void *data) {    from *fr;    document *doc;    const char *ad;    fr = (from *)db;    doc = (document *)data;    ad = document_get_from (doc);    if (!ad)	return NULL;    return from_classify_list (fr, ad);}/** * Keep cygwin happy. */intmain (void) {    return 0;}const char *my_doc_classifier_name = "From";const doc_classifier_functions my_functions = {    .new = from_new_doc,    //.load = from_load,    //.save = from_save,    .learn = from_learn_doc,    .classify_rank = from_classify_doc_rank};

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -