📄 select_mailstat.c
字号:
/* Copyright (C) 2001-2002 Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Program that shows information about mailboxes. * * @author Mikael Ylikoski * @date 2001-2002 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "box.h"#include "collection.h"#include "document.h"#include "holders.h"#include "identifier.h"#include "utility.h"/** * Mailbox training information. */typedef struct { char *name; /**< Mailbox name */ int nom; /**< Number Of Messages */ int first; /**< Number of first message in class */ int last; /**< Number of last message in class */ int max; /**< Maximal step between two messages */} box_stats;static box_stats *mboxes; /**< Mailboxe statistics */static int nob; /**< Number Of mailboxes */static int nom; /**< Number Of Messages in total */static identifier *id; /**< Language identifier */static int lang_en; /**< Count of english documents */static int lang_sw; /**< Count of swedish documents */static int lang_un; /**< Count of unknown documents */enum { NONE, /**< No plot */ FREQ, /**< Frequency plot */ CUM_FREQ, /**< Cumulative frequency plot */ LIFETIME, /**< Lifetime plot */ BIRTH /**< Birth plot */} plottype;static enum csm csm; /**< Class sequence */static enum msm msm; /**< Message sequence */static enum trm trm; /**< Training mode *//** * Train classifier with a message. * * @param bno mailbox number of training message * @param mno message number of training message * @return Zero if ok, or nonzero if there was an error. */static intcheck_msg (void) { const char *lang, *charset; int len, bno, mno, notd, cnotd; document *doc; text_part *tp; doc = collection_get_document (); if (id) { tp = document_get_parts (doc); if (tp) { if (tp->charset) charset = tp->charset; else charset = document_get_charset (doc); len = tp->len; if (len > 1000) len = 1000; lang = identifier_guess_language (id, tp->text, len, charset); } else lang = NULL; if (!lang) lang_un++; else if (!strcmp ("en", lang)) lang_en++; else if (!strcmp ("sv", lang)) lang_sw++; else lang_un++; } bno = collection_get_class (); mno = collection_get_msg (); //printf (":%d:%d: %ld\n", bno, mno, document_get_time(doc)); //printf ("%.100s\n\n", text); cnotd = collection_get_class_notd (bno); notd = collection_get_notd (); if (cnotd == 2) mboxes[bno].max = notd - mboxes[bno].last; else if (cnotd > 2) if (notd - mboxes[bno].last > mboxes[bno].max) mboxes[bno].max = notd - mboxes[bno].last; if (mboxes[bno].first == -1) mboxes[bno].first = notd; mboxes[bno].last = notd; return 0;}/** * Read mailbox names. * * @return Zero if ok, or nonzero if there was an error */static intread_mailconfig (const char *filename) { char buf[128]; int i; FILE *fp; box *b; fp = fopen (filename, "r"); if (!fp) { fprintf (stderr, "Error: Cannot read mail configuration!\n"); return -1; } collection_init (csm, msm, trm, RFC822); for (i = 0; get_line_nows (fp, buf, 128) && i < 128; i++) { mboxes[i].name = my_strdup (buf); b = box_new (buf); if (!b) { fprintf (stderr, "Error: cannot read mailbox '%s'\n", buf); fclose (fp); return -1; } collection_add_box (b); mboxes[i].nom = box_get_nod (b); } nob = i; nom = collection_get_nod (); fclose (fp); return 0;}/** * Main program. */intmain (int argc, char *argv[]) { int i, j, k, l; int print_basic, print_dyn; const char *filename; if (argc != 2) { printf ("Usage: %s <mailconfig file>\n", argv[0]); return 1; } filename = argv[1]; /* Configuration */ plottype = FREQ; print_basic = 1; print_dyn = 1; /* Initialization */ mboxes = malloc (sizeof(box_stats) * 128); if (!mboxes) { fprintf (stderr, "Error: Cannot allocate memory!\n"); return -1; } for (i = 0; i < 128; i++) { mboxes[i].name = NULL; mboxes[i].nom = 0; mboxes[i].first = -1; mboxes[i].last = -1; mboxes[i].max = -1; } csm = TIME; msm = LINEAR_SEQ; trm = ALL_INTERLEAVED; read_mailconfig (filename); switch (plottype) { case NONE: break; case FREQ: break; case CUM_FREQ: break; case LIFETIME: break; case BIRTH: printf ("# Process this data using gnuplot to create a plot.\n" "# gnuplot> plot [0:] [0:1] \"datafile\" with impulses\n"); break; } if (print_basic) { printf ("## Basic information about mailboxes:\n"); for (i = 0; i < nob; i++) printf ("# %d: Name: %s Size:%d\n", i, mboxes[i].name, mboxes[i].nom); } if (print_dyn || plottype == BIRTH || plottype == LIFETIME) { holders_load ("plugins"); id = identifier_new (); if (!id) printf ("Error: Cannot load identifiers\n!"); else { identifier_load_language (id, "share/langid.sv", "sv"); identifier_load_language (id, "share/langid.en", "en"); } lang_en = 0; lang_sw = 0; lang_un = 0; while (collection_next_document ()) { check_msg (); if (plottype == BIRTH) if (collection_get_msg () == 0) printf ("%d 1 # box: %d\n", collection_get_notd (), collection_get_class ()); } } switch (plottype) { case NONE: break; case FREQ: printf ("## Frequency plot\n"); for (i = j = 0; i < nob; j++) { for (k = l = 0; k < nob; k++) if (mboxes[k].nom == j) l++; if (l > 0) { printf ("%d %d\n", j, l); i += l; } } break; case CUM_FREQ: printf ("## Cumultative frequency plot\n"); printf ("## Not implemented yet\n"); break; case LIFETIME: printf ("## Lifetime plot\n"); printf ("## Not implemented yet\n"); break; case BIRTH: printf ("## Birth plot\n"); printf ("## Not implemented yet\n"); break; } if (print_dyn) { printf ("## Dynamic information\n"); for (i = 0; i < nob; i++) { printf ("# %d: First: %d Last: %d Non-activity: %d ", i, mboxes[i].first, mboxes[i].last, nom - mboxes[i].last); if (mboxes[i].nom == 1) printf ("Average-step: 0.0 Max-step: 0\n"); else printf ("Average-step: %.1f Max-step: %d\n", (mboxes[i].last - mboxes[i].first) / ((double)mboxes[i].nom - 1), mboxes[i].max); } printf ("# English: %d Swedish: %d Unknown: %d\n", lang_en, lang_sw, lang_un); } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -