📄 reply.c
字号:
/* Copyright (C) 2001-2002 Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Reply classifier. * * Classifies replies to msgs as the same class as the original msgs. * Replies are usually written within a few days, so the database should be * limited to a time frame of a few days. * A double linked list is used to record the age of the threads. A thread * is as old as its latest message. * * Replies are identified by Subject lines beginning with "Re:". * A possible future extension is to use the In-Reply-To and References fields. * * @author Mikael Ylikoski * @date 2001-2002 */#include <ctype.h>#include <stdlib.h>#include <string.h>#include "doc_classifier.h"#include "reply.h"#include "utility.h"typedef struct rply_ rply;/** * Reply classifier. */struct reply_ { GHashTable *ht; /**< Hash table */ int size; /**< Maximum size */ int nor; /**< Number Of Replies */ rply *first; /**< First (oldest) reply */ rply *last; /**< Last (newest) reply */};/** * Reply. */struct rply_ { char *subject; /**< Mail subject */ int class; /**< Class */ rply *prev; rply *next;};/** * Create a new Reply classifier. * * @param size maximum number of addresses to remember * @return The new classifier. */reply *reply_new (int size) { reply *rp; if (size < 1) return NULL; rp = my_malloc (sizeof(reply)); rp->ht = g_hash_table_new (g_str_hash, g_str_equal); if (!rp->ht) { free (rp); return NULL; } rp->size = size; rp->nor = 0; rp->first = NULL; rp->last = NULL; return rp;}void *reply_load (FILE *f) { char *str; int i, j, k; reply *rdb; fscanf (f, "size %d\n", &i); rdb = reply_new (i); fscanf (f, "nor %d\n", &j); for (i = 0; i < j; i++) { fscanf (f, "%d:", &k); str = my_malloc (k + 1); fread (str, sizeof(char), k, f); str[k] = '\0'; fscanf (f, "=%d;", &k); reply_learn (rdb, str, k); // FIXME free (str); } fscanf (f, "\n"); return rdb;}intreply_save (FILE *f, void *db) { reply *rdb; rply *rp; rdb = (reply *)db; fprintf (f, "size %d\n", rdb->size); fprintf (f, "nor %d\n", rdb->nor); for (rp = rdb->first; rp; rp = rp->next) fprintf (f, "%d:%s=%d;", strlen (rp->subject), rp->subject, rp->class); fprintf (f, "\n"); return 0;}/** * Remove prefixed 'Re:' and white-space from a subject line. * * @param sub subject line * @return A new subject line without any prefixed 'Re:' or white-space. */static const char *reply_trim_subject (const char *sub) { int i, j; j = strlen (sub); for (i = 0; i < j;) { while (sub[i] == ' ' || sub[i] == '\t') i++; if ((sub[i] == 'R' || sub[i] == 'r') && (sub[i + 1] == 'e' || sub[i + 1] == 'E') && (sub[i + 2] == ':')) i += 3; else break; } return &sub[i];}/** * "Add" an reply. * * @param rp reply classifier * @param sub subject line * @param class class */intreply_learn (reply *rp, const char *sub, int class) { char *s; rply *r, *p; s = (char *)reply_trim_subject (sub); r = g_hash_table_lookup (rp->ht, s); if (!r) { r = my_malloc (sizeof(rply)); r->subject = my_strdup (s); r->prev = NULL; r->next = NULL; if (rp->nor == rp->size) { p = rp->first; rp->first = p->next; g_hash_table_remove (rp->ht, p->subject); free (p->subject); free (p); } else rp->nor++; g_hash_table_insert (rp->ht, r->subject, r); } r->class = class; /* put reply last in list */ if (rp->first == NULL) { /* list is empty */ rp->first = r; rp->last = r; } else if (r != rp->last) { /* r is not last in list */ if (r == rp->first) { /* r is first in list */ rp->first = r->next; r->next->prev = NULL; } else if (r->next != NULL) { /* r is inside list */ r->prev->next = r->next; r->next->prev = r->prev; } /* else r is not in list */ r->prev = rp->last; r->next = NULL; rp->last->next = r; rp->last = r; } /* else r is already last in list */ return 0;}/** * Classify a reply. * * @param rp reply classifier * @param sub subject line */intreply_classify (reply *rp, const char *sub) { rply *r; char *s; s = (char *)reply_trim_subject (sub); r = g_hash_table_lookup (rp->ht, s); if (r) return r->class; return -1;}void *reply_new_doc (const char *opts) { int n; n = 100; if (opts) { n = get_opt_int (opts, "n="); if (n < 1) n = 100; } return reply_new (n);}intreply_learn_doc (void *db, void *data, int class) { const char *sub; document *doc; reply *rp; rp = (reply *)db; doc = (document *)data; sub = document_get_subject (doc); if (!sub) return -1; return reply_learn (rp, sub, class);}int *reply_classify_doc_rank (void *db, void *data) { const char *sub; int *il; document *doc; reply *rp; rp = (reply *)db; doc = (document *)data; sub = document_get_subject (doc); if (!sub) return NULL; il = my_malloc (2 * sizeof(int)); il[0] = reply_classify (rp, sub); il[1] = -1; return il;}intreply_classify_doc_top (void *db, void *data) { const char *sub; document *doc; reply *rp; rp = (reply *)db; doc = (document *)data; sub = document_get_subject (doc); if (!sub) return 0; return reply_classify (rp, sub);}/** * Keep cygwin happy. */intmain (void) { return 0;}const char *my_doc_classifier_name = "Reply";const doc_classifier_functions my_functions = { .new = reply_new_doc, .load = reply_load, .save = reply_save, .learn = reply_learn_doc, .classify_rank = reply_classify_doc_rank, .classify_top = reply_classify_doc_top};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -