⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rtf2html.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
	if(feof(f))	{		fprintf(stderr,"Unexpected end of file\n");		return;	}	switch (ch)	{		case '\\':		case '{':		case '}':			*pch=ch; *pf=TRUE;			break;		case '*':			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/			if(skip_to_level>level-1||skip_to_level==-1)				skip_to_level = level-1;			break;		case '\'':		{			char ch1, ch2;			ch1 = RTF_GetChar();			ch2 = RTF_GetChar();			if(!feof(f))			{				if(isxdigit(ch1)&&isxdigit(ch2))				{					ch = chartoi(ch1)*16+chartoi(ch2);					*pch=ch; *pf=TRUE;				} else {					fprintf(stderr,"RTF Error: unexpected '%c%c' after \\\'\n",ch1,ch2);				}			}			break;		}		default:			if (isalpha(ch))			{				RTF_BuildToken(ch);			} else {				fprintf(stderr, "\nRTF Error: unexpected '%c' after \\.\n", ch);			}			break;	}}/**************************************/void RTF_ParseStyle(void){	char ch, pf;	int level0;	void (*PrevDoControl)(char*,char*);	level0=level;	PrevDoControl=RTF_DoControl;	RTF_DoControl=RTF_DoStyleControl;	RTF_ClearName(style_name);	style_number[0]=0;	while (1)	{		ch = RTF_GetChar();		if(feof(f))			break;		switch (ch)		{			case '\\':				RTF_backslash(&ch,&pf);				if(pf)				{					RTF_BuildName(style_name,ch);				} else {					RTF_ClearName(style_name);				}				break;			case '{':				level++;				RTF_ClearName(style_name);				break;			case '}':				if(level0+1==level)				{					if(style_number[0]!=0)					{						RTF_AddStyleMap(style_name,style_number);						style_number[0]=0;					}				} else if(level0==level) {					RTF_DoControl=PrevDoControl;					RTF_UnGetChar(ch);					return;				}				level--;				RTF_ClearName(style_name);				break;			default:				RTF_BuildName(style_name,ch);				break;		}	} /* while */}char isField = FALSE;char isHyperLink = FALSE;void RTF_IgnoreControl(char* control, char* arg){	(void)control; (void)arg;}void ParseFldInst(void){	static const char* kwd= "HYPERLINK";	short level0;	int i;	void (*PrevDoControl)(char*,char*);	char ch, pf, endc;	if(!isField||isHyperLink)		return;	/* look for HYPERLINK */	level0 = level-1;	i = 0;	while (1)	{		ch = RTF_GetChar();		if(feof(f))		{			fprintf(stderr, "Unexpected end of file\n");			break;		}		switch (ch)		{			case '\\':				RTF_backslash(&ch,&pf);				if(pf)					i=-1;				else					i=0;				break;			case '{':				RTF_PushState(&level);				i=0;				break;			case '}':				RTF_PopState(&level);				if(level0==level)					return;				i=0;				break;			default:				if(i<0)				{					if(isspace(ch))						i=0;				} else {					if(kwd[i]==ch)						i++;					else if(kwd[i]==0&&isspace(ch))						goto found;					else if(isspace(ch))						i=0;					else						i=-1;				}				break;		}	}/*while*/found:	gobble = FALSE;	isHyperLink = TRUE;	PrevDoControl=RTF_DoControl;	RTF_DoControl=RTF_IgnoreControl;	RTF_PutStr("<a href=\"");	ch = RTF_GetChar();	if(ch=='"')	{		endc = ch;		ch = RTF_GetChar();	} else {		endc = 0;	}	level0=level;	while(1)	{		if(feof(f))		{			fprintf(stderr, "Unexpected end of file\n");			break;		}		if((level==level0) && ((endc && endc==ch) || isspace(ch)))			break;		switch(ch)		{			case '\\':				RTF_backslash(&ch,&pf);				if(!pf)					goto next;				break;			case '{':				level++;				break;			case '}':				if(level==level0)					goto done;				--level;				break;		}		if(ch=='"')			RTF_PutStr("&#34;");		else			RTF_PutChar(ch);next:		ch = RTF_GetChar();	}done:	RTF_UnGetChar(ch);	RTF_PutStr("\">");	RTF_DoControl=PrevDoControl;	gobble = TRUE;}void ParseField(void){	short level0;	char ch, pf;	if(isField)	{		return;	}	level0 = level-1;	isField = TRUE;	while (1)	{		ch = RTF_GetChar();		if(feof(f))		{			fprintf(stderr, "Unexpected end of file\n");			break;		}		switch (ch)		{			case '\\':				RTF_backslash(&ch,&pf);				if(pf) RTF_PutChar(ch);				break;			case '{':				RTF_PushState(&level);				break;			case '}':				RTF_PopState(&level);				if(level0==level)				{					if(isHyperLink)					{						char save = gobble;						gobble = FALSE;						RTF_PutStr("</a>");						gobble = save;						isHyperLink = FALSE;					}					isField = FALSE;					return;				}				break;			default:				RTF_PutChar(ch);				break;		}	}/*while*/}/**************************************/void RTF_DoParseInfo(void){	char ch, pf;	int level0;	level0=level;	while (1)	{		ch = RTF_GetChar();		if(feof(f))			break;		switch (ch)		{			case '\\':				RTF_backslash(&ch,&pf);				if(pf)					RTF_PutChar(ch);				break;			case '{':				level++;				break;			case '}':				if(level==level0)				{					RTF_UnGetChar(ch);					return;				}				--level;				break;			default:				RTF_PutChar(ch);				break;		}	} /* while */}void RTF_DoInfoControl(char* control, char* arg){	if(arg[0]==0 && !strcasecmp(control,"title"))	{		gobble = FALSE;		RTF_DoControl=RTF_IgnoreControl;		RTF_PutStr("<TITLE>");		RTF_DoParseInfo();		RTF_PutStr("</TITLE>\n");		RTF_DoControl=RTF_DoInfoControl;		gobble = TRUE;	}}void RTF_ParseInfo(void){	void (*PrevDoControl)(char*,char*);	short prev_gobble;	prev_gobble = gobble;	gobble = TRUE; /* skip all unknown information */	PrevDoControl=RTF_DoControl;	if(isBody)	{		/* too late for info, just skip it */		RTF_DoControl=RTF_IgnoreControl;		RTF_DoParseInfo();		return;	} else {		if(!isHead) RTF_StartHead();		RTF_DoControl=RTF_DoInfoControl;		isBody=TRUE; /* to prevent RTF_Put from start a body */		RTF_DoParseInfo();		isBody=FALSE;	}	RTF_DoControl=PrevDoControl;	gobble = prev_gobble;}/**************************************//* Perform actions for RTF control words */void RTF_DoBodyControl (char* control,char* arg){	short style;	switch (GetTokenIndex(control))	{		case t_stylesheet:			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/			skip_to_level = level-1;			RTF_ParseStyle();			break;		case t_fonttbl:	/*skip all of these and their contents!*/		case t_colortbl:			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/			skip_to_level = level-1;			break;		case t_info:			RTF_ParseInfo();			break;		case t_field:			ParseField();			break;		case t_fldinst:			ParseFldInst();			break;		case t_pict:			gobble = TRUE;	/*perform no output, ignore commands 'til level-1*/			if(skip_to_level>=level || skip_to_level==-1)				skip_to_level = level-1;			break;		case t_s: /*Style*/			if (!curr_heading)			{				style = RTF_MapStyle (arg);				if(style)				{					curr_heading = s_h0 + style;					RTF_PutStr(styles[curr_heading][0]);					ignore_styles = TRUE;				}			}			break;		case t_b: /*Bold*/				RTF_SetStyle(&curr_style,s_bold);			break;		case t_ulw:		case t_uld:		case t_uldb:		case t_ul: /*Underline, maps to "emphasis" HTML style*/				if(!isHyperLink)					RTF_SetStyle(&curr_style,s_underline);			break;		case t_i: /*Italic*/				RTF_SetStyle(&curr_style,s_italic);			break;		case t_v: /* Hidden*/				RTF_SetStyle(&curr_style,s_hidden);			break;		case t_par: /*Paragraph*/			if (curr_heading!=s_plain) {				RTF_PutStr(styles[curr_heading][1]);				curr_heading = s_plain;			} else {				RTF_PutStr(styles[s_para][0]);			}			ignore_styles = FALSE;			break;		case t_plain: /*reset inline styles*/			RTF_PlainStyle(&curr_style);			break;		case t_cell:		case t_tab:			RTF_PutChar(' ');			break;		case t_endash:		case t_emdash:			RTF_PutChar('-');			break;		case t_line:		case t_row:			RTF_PutStr(styles[s_br][0]);			break;		case t_bullet:			RTF_PutChar('\xb7');			break;		case t_start:		case t_end:			break;	}}/**************************************//* RTF_Parse is a crude, ugly state machine that understands enough of *//* the RTF syntax to be dangerous.                                     */void RTF_ParseBody(void){	char ch, pf;	RTF_DoControl=RTF_DoBodyControl;	level = 0;	skip_to_level = -1;	gobble = FALSE;	ignore_styles = FALSE;	while (1)	{		ch = RTF_GetChar();		if(feof(f))			break;		switch (ch)		{			case '\\':				RTF_backslash(&ch,&pf);				if(pf) RTF_PutCharEx(ch);				break;			case '{':				RTF_PushState(&level);				break;			case '}':				RTF_PopState(&level);				break;			default:				RTF_PutCharEx(ch);				break;		}	}/*while*/}/**************************************/int RTF_Parse (char* filename){	if (filename)	{		if (!(f = fopen (filename, "r")))		{			fprintf (stderr, "\nError: Input file %s not found.\n", filename);			return (-1);		}	} else {		f = stdin;	}	isHead=isBody=FALSE;	RTF_ParseBody();	if(isBody) RTF_PutStr("</BODY>\n");	RTF_PutStr("</HTML>\n");	fclose (f);	return 0;}/**************************************/void Initialize(void){	int i;	for (i=0;i<MAX_LEVELS;i++)			style_state[i].s=s_plain;	curr_style.s=s_plain;	curr_heading = s_plain;	// Set default styles maping	style_mappings[0][0]=0;	for(i=1;i<=6;i++)			sprintf(style_mappings[i],"%d",256-i);}/**************************************/int main(int argc,char** argv){	char *in;	if (argc>1)	{		if( strcmp(argv[1],"--help")==0 || strcmp(argv[1],"-H")==0 )		{			printf("Use: %s [rtf_filename]\n",argv[0]);			return 0;		} else if ( strcmp(argv[1],"--version")==0 || strcmp(argv[1],"-V")==0 ) {			printf("rtf2html version 1.2\n");			return 0;		} else			in = argv[1];	} else		in = NULL;	Initialize();	return (RTF_Parse(in));}/* Note, the source is formated with 4 character tabs * vim: sw=4 ts=4 */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -