⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ps2txt.cpp

📁 pdf,html,ps等转txt的头文件和库。还包括ictclas中科院计算所的中文分词工具有.h和.lib和.d
💻 CPP
字号:
#include "ps2txt.h"

void dviparse(FILE* source,FILE* target)
{
	int ch,                 /* current character */
		prev_ch = '\n',     /* previously read character */
		in_paren = FALSE,   /* inside or outside of parentheses? */
		b_flag = FALSE,     /* true if previous character was ')' */
		b_space = TRUE;     /* true if a 'b' should produce a space */
	char junk[80];          /* place to throw away comment lines */

	while ((ch = fgetc(source)) != EOF)
	{
		if (ch == '\n') ch = fgetc(source);       /* ignore newlines in input! */
		if (in_paren)              /* strings to print come inside parentheses */
			switch(ch)
		{
			case ')'  : in_paren--; b_flag=1; break; /* not in paren's anymore */
			case '\n' : Putc(' ',target); break;              /* <cr> = ' ' in parens */
			case '\\' : 
				switch(ch=fgetc(source))
				{
				case '(' :
				case ')' : Putc(ch,target); break;                         /* from \? */
				case 't' : Putc('\t',target); break;                   /* write a tab */
				case 'n' : Putc('\n',target); break;                  /* write a <cr> */
				case '\\': Putc('"',target); break;                    /* open quotes */
				case '0' : switch(ch=fgetc(source))
						   {
				case '1': switch(ch=fgetc(source))
						  {
				case '3' : fputs("ff",target); break;         /* from \01? */
				case '4' : fputs("fi",target); break;
				case '5' : fputs("fl",target); break;
				case '6' : fputs("ffi",target); break;
				case '7' : fputs("ffl",stdout); break;
				default: fputs("\\01",target); Putc(ch,target);   /* unknown code */
						  } break;                                       /* from \0? */
				default: fputs("\\0",target); Putc(ch,target);      /* unknown code */
						   } break;
				case '1' : case '2' : case '3' : case '4' :
				case '5' : case '6' : case '7' : Putc('\\',target);   /* unknown code */
				default: Putc(ch,target);
				} break;                               /* from original switch */
			default: Putc(ch,target);
		}
		else                                                 /* not in paren's */
			switch(ch)
		{
			case '%'  : fgets(junk, 80, source); break;   /* toss out comments */
			case '\n' : break;                /* skip <cr>'s outside of parens */
			case '-'  : if (b_flag) 
						{
							b_flag = 0;                   /* because now prev. char != ')' */
							b_space = 0;    /* but the number after ')' is negative, so no */
							/*       space in case the letter code is 'b'. */
							/*                  the default is b_space = 1 */
						} break;
			case '('  : in_paren++;                    /* back in parens again */
				switch(prev_ch)     /* check prev char to see if we need a space */
				{
				case 'l' : case 'm' : case 'n' : case 'o' : /* not for these 8 */
				case 'q' : case 'r' : case 's' : case 't' : 
					break;
				case 'y' : Putc('\n',target); break;                /* need a newline */
				case 'b' : if (b_space) Putc(' ',target); break; /* 'b' w/ a + number */
				case 'a' : case 'c' : case 'd' : case 'e' : 
				case 'f' : case 'g' : case 'h' : case 'i' : 
				case 'j' : case 'k' : case 'x' : Putc(' ',target); break;
				default: break;
				} 
				b_space = 1;              /* reset flag to default for next time */
				break;
			default: b_flag = 0; break;            /* junk stuff not in parens */
		}
		prev_ch=ch;  /* remember this char in case !in_paren and next ch = '(' */
	}
}

void psparse(FILE* source)     /* Iqbal's original uncommented program, unmodified */
/* except for stripping i/o stuff off the top, etc: */
{
	char *str;
	char junk[80];
	int ch, para=0, last=0;
	while ((ch=fgetc(source)) != EOF)
	{
		switch (ch)
		{
		case '%'  : if (para==0) fgets(junk, 80, source);
					else putchar(ch);
		case '\n' : if (last==1) { puts(""); last=0; } break;
		case '('  : if (para++>0) putchar(ch); break;
		case ')'  : if (para-->1) putchar(ch); 
					else putchar(' ');
					last=1; break;

		case '\\' : if (para>0)
						switch(ch=fgetc(source))
					{
						case '(' :
						case ')' :  putchar(ch); break;
						case 't' :  putchar('\t'); break;
						case 'n' :  putchar('\n'); break;
						case '\\':  putchar('\\'); break;
						case '0' :  case '1' : case '2' : case '3' :
						case '4' :  case '5' : case '6' : case '7' :
							putchar('\\');
						default:  putchar(ch); break;
					}
					break;
		default:	if (para>0) putchar(ch);
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -