📄 nmrtf.c
字号:
/* * nmrtf.c * * Copyright (c) 2004 Novell, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This code was adapted from the sample RTF reader found here: * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dnrtfspec/html/rtfspec.asp */#include <glib.h>#include <stdlib.h>#include <stdio.h>#include <stddef.h>#include <ctype.h>#include <string.h>#include "nmrtf.h"#include "debug.h"/* Internal RTF parser error codes */#define NMRTF_OK 0 /* Everything's fine! */#define NMRTF_STACK_UNDERFLOW 1 /* Unmatched '}' */#define NMRTF_STACK_OVERFLOW 2 /* Too many '{' -- memory exhausted */#define NMRTF_UNMATCHED_BRACE 3 /* RTF ended during an open group. */#define NMRTF_INVALID_HEX 4 /* invalid hex character found in data */#define NMRTF_BAD_TABLE 5 /* RTF table (sym or prop) invalid */#define NMRTF_ASSERTION 6 /* Assertion failure */#define NMRTF_EOF 7 /* End of file reached while reading RTF */#define NMRTF_CONVERT_ERROR 8 /* Error converting text */#define NMRTF_MAX_DEPTH 256typedef enum{ NMRTF_STATE_NORMAL, NMRTF_STATE_SKIP, NMRTF_STATE_FONTTABLE, NMRTF_STATE_BIN, NMRTF_STATE_HEX} NMRtfState; /* Rtf State *//* Property types that we care about */typedef enum{ NMRTF_PROP_FONT_IDX, NMRTF_PROP_FONT_CHARSET, NMRTF_PROP_MAX} NMRtfProperty;typedef enum{ NMRTF_SPECIAL_BIN, NMRTF_SPECIAL_HEX, NMRTF_SPECIAL_UNICODE, NMRTF_SPECIAL_SKIP} NMRtfSpecialKwd;typedef enum{ NMRTF_DEST_FONTTABLE, NMRTF_DEST_SKIP} NMRtfDestinationType;typedef enum{ NMRTF_KWD_CHAR, NMRTF_KWD_DEST, NMRTF_KWD_PROP, NMRTF_KWD_SPEC} NMRtfKeywordType;typedef struct _NMRTFCharProp{ /* All we care about for now is the font. * bold, italic, underline, etc. should be * added here */ int font_idx; int font_charset;} NMRtfCharProp;typedef struct _NMRtfStateSave{ NMRtfCharProp chp; NMRtfState rds; NMRtfState ris;} NMRtfStateSave;typedef struct _NMRtfSymbol{ char *keyword; /* RTF keyword */ int default_val; /* default value to use */ gboolean pass_default; /* true to use default value from this table */ NMRtfKeywordType kwd_type; /* the type of the keyword */ int action; /* property type if the keyword represents a property */ /* destination type if the keyword represents a destination */ /* character to print if the keyword represents a character */} NMRtfSymbol;typedef struct _NMRtfFont{ int number; char *name; int charset;} NMRtfFont;/* RTF Context */struct _NMRtfContext{ NMRtfState rds; /* destination state */ NMRtfState ris; /* internal state */ NMRtfCharProp chp; /* current character properties (ie. font, bold, italic, etc.) */ GSList *font_table; /* the font table */ GSList *saved; /* saved state stack */ int param; /* numeric parameter for the current keyword */ long bytes_to_skip; /* number of bytes to skip (after encountering \bin) */ int depth; /* how many groups deep are we */ gboolean skip_unknown; /* if true, skip any unknown destinations (this is set after encountering '\*') */ char *input; /* input string */ char nextch; /* next char in input */ GString *ansi; /* Temporary ansi text, will be convert/flushed to the output string */ GString *output; /* The plain text UTF8 string */};static int rtf_parse(NMRtfContext *ctx);static int rtf_push_state(NMRtfContext *ctx);static int rtf_pop_state(NMRtfContext *ctx);static NMRtfFont *rtf_get_font(NMRtfContext *ctx, int index);static int rtf_get_char(NMRtfContext *ctx, guchar *ch);static int rtf_unget_char(NMRtfContext *ctx, guchar ch);static int rtf_flush_data(NMRtfContext *ctx);static int rtf_parse_keyword(NMRtfContext *ctx);static int rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set);static int rtf_dispatch_char(NMRtfContext *ctx, guchar ch);static int rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch);static int rtf_print_char(NMRtfContext *ctx, guchar ch);static int rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch);static int rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType dest);static int rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd special);static int rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val);/* RTF parser tables *//* Keyword descriptions */NMRtfSymbol rtf_symbols[] = { /* keyword, default, pass_default, keyword_type, action */ {"fonttbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_FONTTABLE}, {"f", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_IDX}, {"fcharset", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_CHARSET}, {"par", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, {"line", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, {"\0x0a", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, {"\0x0d", 0, FALSE, NMRTF_KWD_CHAR, 0x0a}, {"tab", 0, FALSE, NMRTF_KWD_CHAR, 0x09}, {"\r", 0, FALSE, NMRTF_KWD_CHAR, '\r'}, {"\n", 0, FALSE, NMRTF_KWD_CHAR, '\n'}, {"ldblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, {"rdblquote",0, FALSE, NMRTF_KWD_CHAR, '"'}, {"{", 0, FALSE, NMRTF_KWD_CHAR, '{'}, {"}", 0, FALSE, NMRTF_KWD_CHAR, '}'}, {"\\", 0, FALSE, NMRTF_KWD_CHAR, '\\'}, {"bin", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_BIN}, {"*", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_SKIP}, {"'", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_HEX}, {"u", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_UNICODE}, {"colortbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"author", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"buptim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"comment", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"creatim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"doccomm", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"footer", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"footerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"footerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"footerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"footnote", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"ftncn", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"ftnsep", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"ftnsepc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"header", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"headerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"headerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"headerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"info", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"keywords", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"operator", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"pict", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"printim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"private1", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"revtim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"rxe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"stylesheet", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"subject", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"tc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"title", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"txe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}, {"xe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}};int table_size = sizeof(rtf_symbols) / sizeof(NMRtfSymbol);NMRtfContext *nm_rtf_init(){ NMRtfContext *ctx = g_new0(NMRtfContext, 1); ctx->nextch = -1; ctx->ansi = g_string_new(""); ctx->output = g_string_new(""); return ctx;}char *nm_rtf_strip_formatting(NMRtfContext *ctx, const char *input){ int status; ctx->input = (char *)input; status = rtf_parse(ctx); if (status == NMRTF_OK) return g_strdup(ctx->output->str); purple_debug_info("novell", "RTF parser failed with error code %d", status); return NULL;}voidnm_rtf_deinit(NMRtfContext *ctx){ GSList *node; NMRtfFont *font; NMRtfStateSave *save; if (ctx) { for (node = ctx->font_table; node; node = node->next) { font = node->data; g_free(font->name); g_free(font); node->data = NULL; } g_slist_free(ctx->font_table); for (node = ctx->saved; node; node = node->next) { save = node->data; g_free(save); node->data = NULL; } g_slist_free(ctx->saved); g_string_free(ctx->ansi, TRUE); g_string_free(ctx->output, TRUE); g_free(ctx); }}static const char *get_current_encoding(NMRtfContext *ctx){ NMRtfFont *font; font = rtf_get_font(ctx, ctx->chp.font_idx); switch (font->charset) { case 0: return "CP1252"; case 77: return "MACINTOSH"; case 78: return "SJIS"; case 128: return "CP932"; case 129: return "CP949"; case 130: return "CP1361"; case 134: return "CP936"; case 136: return "CP950"; case 161: return "CP1253"; case 162: return "CP1254"; case 163: return "CP1258"; case 181: case 177: return "CP1255"; case 178: case 179: case 180: return "CP1256"; case 186: return "CP1257"; case 204: return "CP1251"; case 222: return "CP874"; case 238: return "CP1250"; case 254: return "CP437"; default: purple_debug_info("novell", "Unhandled font charset %d\n", font->charset); return "CP1252"; } return "CP1252";}/* * Add an entry to the font table */static intrtf_add_font_entry(NMRtfContext *ctx, int number, const char *name, int charset){ NMRtfFont *font = g_new0(NMRtfFont, 1); font->number = number; font->name = g_strdup(name); font->charset = charset; purple_debug_info("novell", "Adding font to table: #%d\t%s\t%d\n", font->number, font->name, font->charset); ctx->font_table = g_slist_append(ctx->font_table, font); return NMRTF_OK;}/* * Return the nth entry in the font table */static NMRtfFont *rtf_get_font(NMRtfContext *ctx, int nth){ NMRtfFont *font; font = g_slist_nth_data(ctx->font_table, nth); return font;}/* * Step 1: * Isolate RTF keywords and send them to rtf_parse_keyword; * Push and pop state at the start and end of RTF groups; * Send text to rtf_dispatch_char for further processing. */static intrtf_parse(NMRtfContext *ctx){ int status; guchar ch; guchar hex_byte = 0; int hex_count = 2; int len; if (ctx->input == NULL) return NMRTF_OK; while (rtf_get_char(ctx, &ch) == NMRTF_OK) { if (ctx->depth < 0) return NMRTF_STACK_UNDERFLOW; /* if we're parsing binary data, handle it directly */ if (ctx->ris == NMRTF_STATE_BIN) { if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK) return status; } else { switch (ch) { case '{': if (ctx->depth > NMRTF_MAX_DEPTH) return NMRTF_STACK_OVERFLOW; rtf_flush_data(ctx); if ((status = rtf_push_state(ctx)) != NMRTF_OK) return status; break; case '}': rtf_flush_data(ctx); /* for some reason there is always an unwanted '\par' at the end */ if (ctx->rds == NMRTF_STATE_NORMAL) { len = ctx->output->len; if (ctx->output->str[len-1] == '\n') ctx->output = g_string_truncate(ctx->output, len-1); } if ((status = rtf_pop_state(ctx)) != NMRTF_OK) return status; if (ctx->depth < 0) return NMRTF_STACK_OVERFLOW; break; case '\\': if ((status = rtf_parse_keyword(ctx)) != NMRTF_OK) return status; break; case 0x0d: case 0x0a: /* cr and lf are noise characters... */ break; default: if (ctx->ris == NMRTF_STATE_NORMAL) { if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -