⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 document.c

📁 使用具有增量学习的监控式学习方法。包括几个不同的分类算法。
💻 C
字号:
/* Copyright (C) 2002  Mikael Ylikoski * See the accompanying file "README" for the full copyright notice *//** * @file * Structured document interface. * * @author  Mikael Ylikoski * @date    2002 */#include <stdio.h>#include <stdlib.h>#include <string.h>#include "document.h"#include "rfc822.h"#include "utility.h"/** * Document. */struct document_ {    enum document_type dt;	/**< Document type */    struct rfc822 *msg;		/**< Email message */    text_part *parts;		/**< Text parts of document */    char *charset;		/**< Character set */    char *from;			/**< Sender */    char *subject;		/**< Subject */    char *language;		/**< Language */    //time_t time;};/** * Create a new document. * * @param source  source text; must not be used or freed by caller afterwards * @param dt      document type */document *document_new (char *source, enum document_type dt) {    document *doc;    doc = my_malloc (sizeof(document));    doc->dt = dt;    doc->from = NULL;    doc->subject = NULL;    doc->language = NULL;    doc->charset = NULL;    doc->parts = NULL;    if (source)	switch (doc->dt) {	case PLAIN:	    document_add_text (doc, NULL, source, strlen (source));	    break;	case RFC822:	    doc->msg = rfc822_new (source);	    /*	    if (!doc->msg) {		free (doc);		return NULL;	    }	    */	    break;	}    return doc;}/** * Add a text to a document. * * @param doc      document to add to * @param charset  character set of text * @param text     text to add * @param len      length of text */voiddocument_add_text (document *doc, char *charset, char *text, int len) {    text_part *tp, *pt;    tp = my_malloc (sizeof(text_part));    tp->text = text;    tp->len = len;    tp->charset = charset;    tp->next = NULL;    if (!doc->parts)	doc->parts = tp;    else {	for (pt = doc->parts; pt->next; pt = pt->next)	    ;	pt->next = tp;    }}/** * Free memory used by a document. * * @param doc  document to free */voiddocument_free (document *doc) {    text_part *tp, *pt;    switch (doc->dt) {    case PLAIN:	for (tp = doc->parts; tp; tp = pt) {	    pt = tp->next;	    if (tp->charset)		free (tp->charset);	    if (tp->text)		free (tp->text);	    free (tp);	}	if (doc->from)	    free (doc->from);	if (doc->subject)	    free (doc->subject);	/*if (doc->language)	  free (doc->language);*/	break;    case RFC822:	if (doc->msg)	    rfc822_free (doc->msg);	break;    }    free (doc);}intdocument_set_from (document *doc, char *from) {    switch (doc->dt) {    case PLAIN:	doc->from = from;	break;    case RFC822:	return -1;    }    return 0;}intdocument_set_subject (document *doc, char *subject) {    switch (doc->dt) {    case PLAIN:	doc->subject = subject;	break;    case RFC822:	return -1;    }    return 0;}intdocument_set_language (document *doc, char *lang) {    doc->language = lang;    return 0;}intdocument_set_rfc822 (document *doc, char *src) {    switch (doc->dt) {    case PLAIN:	doc->dt = RFC822;	// free	break;    case RFC822:	if (doc->msg)	    rfc822_free (doc->msg);	break;    }    doc->msg = rfc822_new (src);    if (!doc->msg)	return -1;    return 0;}/** * Get sender line of document. */const char *document_get_from (document *doc) {    switch (doc->dt) {    case PLAIN:	return doc->from;    case RFC822:	if (doc->msg)	    return rfc822_get_from (doc->msg);    }    return NULL;}/** * Get sender name of document. */const char *document_get_from_name (document *doc) {    switch (doc->dt) {    case PLAIN:	break;    case RFC822:	if (doc->msg)	    return rfc822_get_from_name (doc->msg);    }    return NULL;}/** * Get sender address of document. */const char *document_get_from_address (document *doc) {    switch (doc->dt) {    case PLAIN:	break;    case RFC822:	if (doc->msg)	    return rfc822_get_from_address (doc->msg);    }    return NULL;}/** * Get receiver line of document. */const char *document_get_to (document *doc) {    switch (doc->dt) {    case PLAIN:	break;    case RFC822:	if (doc->msg)	    return rfc822_get_to (doc->msg);    }    return NULL;}/** * Get subject line of document. */const char *document_get_subject (document *doc) {    switch (doc->dt) {    case PLAIN:	return doc->subject;    case RFC822:	if (doc->msg)	    return rfc822_get_subject (doc->msg);    }    return NULL;}/** * Get time of document. */time_tdocument_get_time (document *doc) {    switch (doc->dt) {    case RFC822:	if (doc->msg)	    return rfc822_get_date (doc->msg);    default:	return -1;    }}/** * Get language of document. */const char *document_get_language (document *doc) {    return doc->language;}/** * Get charset of first part. */const char *document_get_charset (document *doc) {    if (doc->charset)	return doc->charset;    return "ISO-8859-1";}text_part *document_get_parts (document *doc) {    switch (doc->dt) {    case PLAIN:	return doc->parts;    case RFC822:	if (doc->msg)	    return rfc822_get_parts (doc->msg);    }    return NULL;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -