⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 select_mailstat.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
字号:
/* Copyright (C) 2001-2002  Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Program that shows information about mailboxes. * * @author  Mikael Ylikoski * @date    2001-2002 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "box.h"#include "collection.h"#include "document.h"#include "holders.h"#include "identifier.h"#include "utility.h"/** * Mailbox training information. */typedef struct {    char *name;			/**< Mailbox name */    int nom;			/**< Number Of Messages */    int first;			/**< Number of first message in class */    int last;			/**< Number of last message in class */    int max;			/**< Maximal step between two messages */} box_stats;static box_stats *mboxes;	/**< Mailboxe statistics */static int nob;			/**< Number Of mailboxes */static int nom;			/**< Number Of Messages in total */static identifier *id;		/**< Language identifier */static int lang_en;		/**< Count of english documents */static int lang_sw;		/**< Count of swedish documents */static int lang_un;		/**< Count of unknown documents */enum {    NONE,			/**< No plot */    FREQ,			/**< Frequency plot */    CUM_FREQ,			/**< Cumulative frequency plot */    LIFETIME,			/**< Lifetime plot */    BIRTH			/**< Birth plot */} plottype;static enum csm csm;		/**< Class sequence */static enum msm msm;		/**< Message sequence */static enum trm trm;		/**< Training mode *//** * Train classifier with a message. * * @param bno  mailbox number of training message * @param mno  message number of training message * @return Zero if ok, or nonzero if there was an error. */static intcheck_msg (void) {    const char *lang, *charset;    int len, bno, mno, notd, cnotd;    document *doc;    text_part *tp;    doc = collection_get_document ();    if (id) {	tp = document_get_parts (doc);	if (tp) {	    if (tp->charset)		charset = tp->charset;	    else		charset = document_get_charset (doc);	    len = tp->len;	    if (len > 1000)		len = 1000;	    lang = identifier_guess_language (id, tp->text, len, charset);	} else	    lang = NULL;	if (!lang)	    lang_un++;	else if (!strcmp ("en", lang))	    lang_en++;	else if (!strcmp ("sv", lang))	    lang_sw++;	else	    lang_un++;    }    bno = collection_get_class ();    mno = collection_get_msg ();    //printf (":%d:%d: %ld\n", bno, mno, document_get_time(doc));    //printf ("%.100s\n\n", text);    cnotd = collection_get_class_notd (bno);    notd = collection_get_notd ();    if (cnotd == 2)	mboxes[bno].max = notd - mboxes[bno].last;    else if (cnotd > 2)	if (notd - mboxes[bno].last > mboxes[bno].max)	    mboxes[bno].max = notd - mboxes[bno].last;    if (mboxes[bno].first == -1)	mboxes[bno].first = notd;    mboxes[bno].last = notd;    return 0;}/** * Read mailbox names. * * @return Zero if ok, or nonzero if there was an error */static intread_mailconfig (const char *filename) {    char buf[128];    int i;    FILE *fp;    box *b;    fp = fopen (filename, "r");    if (!fp) {	fprintf (stderr, "Error: Cannot read mail configuration!\n");	return -1;    }    collection_init (csm, msm, trm, RFC822);    for (i = 0; get_line_nows (fp, buf, 128) && i < 128; i++) {	mboxes[i].name = my_strdup (buf);	b = box_new (buf);	if (!b) {	    fprintf (stderr, "Error: cannot read mailbox '%s'\n", buf);	    fclose (fp);	    return -1;	}	collection_add_box (b);	mboxes[i].nom = box_get_nod (b);    }    nob = i;    nom = collection_get_nod ();    fclose (fp);    return 0;}/** * Main program. */intmain (int argc, char *argv[]) {    int i, j, k, l;    int print_basic, print_dyn;    const char *filename;    if (argc != 2) {	printf ("Usage: %s <mailconfig file>\n", argv[0]);	return 1;    }    filename = argv[1];    /* Configuration */    plottype = FREQ;    print_basic = 1;    print_dyn = 1;    /* Initialization */    mboxes = malloc (sizeof(box_stats) * 128);    if (!mboxes) {	fprintf (stderr, "Error: Cannot allocate memory!\n");	return -1;    }    for (i = 0; i < 128; i++) {	mboxes[i].name = NULL;	mboxes[i].nom = 0;	mboxes[i].first = -1;	mboxes[i].last = -1;	mboxes[i].max = -1;    }    csm = TIME;    msm = LINEAR_SEQ;    trm = ALL_INTERLEAVED;    read_mailconfig (filename);    switch (plottype) {    case NONE:	break;    case FREQ:	break;    case CUM_FREQ:	break;    case LIFETIME:	break;    case BIRTH:	printf ("# Process this data using gnuplot to create a plot.\n"		"# gnuplot> plot [0:] [0:1] \"datafile\" with impulses\n");	break;    }    if (print_basic) {	printf ("## Basic information about mailboxes:\n");	for (i = 0; i < nob; i++)	    printf ("# %d: Name: %s  Size:%d\n",		    i, mboxes[i].name, mboxes[i].nom);    }    if (print_dyn || plottype == BIRTH || plottype == LIFETIME) {	holders_load ("plugins");	id = identifier_new ();	if (!id)	    printf ("Error: Cannot load identifiers\n!");	else {	    identifier_load_language (id, "share/langid.sv", "sv");	    identifier_load_language (id, "share/langid.en", "en");	}	lang_en = 0;	lang_sw = 0;	lang_un = 0;	while (collection_next_document ()) {	    check_msg ();	    if (plottype == BIRTH)		if (collection_get_msg () == 0)		    printf ("%d 1  # box: %d\n", collection_get_notd (),			    collection_get_class ());	}    }    switch (plottype) {    case NONE:	break;    case FREQ:	printf ("## Frequency plot\n");	for (i = j = 0; i < nob; j++) {	    for (k = l = 0; k < nob; k++)		if (mboxes[k].nom == j)		    l++;	    if (l > 0) {		printf ("%d %d\n", j, l);		i += l;	    }	}	break;    case CUM_FREQ:	printf ("## Cumultative frequency plot\n");	printf ("## Not implemented yet\n");	break;    case LIFETIME:	printf ("## Lifetime plot\n");	printf ("## Not implemented yet\n");	break;    case BIRTH:	printf ("## Birth plot\n");	printf ("## Not implemented yet\n");	break;    }    if (print_dyn) {	printf ("## Dynamic information\n");	for (i = 0; i < nob; i++) {	    printf ("# %d: First: %d  Last: %d  Non-activity: %d  ", i,		    mboxes[i].first, mboxes[i].last, nom - mboxes[i].last);	    if (mboxes[i].nom == 1)		printf ("Average-step: 0.0  Max-step: 0\n");	    else		printf ("Average-step: %.1f  Max-step: %d\n",			(mboxes[i].last - mboxes[i].first) /			((double)mboxes[i].nom - 1), mboxes[i].max);	}	printf ("# English: %d  Swedish: %d  Unknown: %d\n",		lang_en, lang_sw, lang_un);    }    return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -