⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dvi.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
/* * (c) Copyright 1993 by Panagiotis Tsirigotis * All rights reserved.  The file named COPYRIGHT specifies the terms  * and conditions for redistribution. */static char RCSid[] = "dvi.c,v 1.5 1994/12/20 20:05:15 hardy Exp";#include <ctype.h>#include <string.h>#include "sio.h"#include "defs.h"static int get_file_char(fdp)int *fdp;{	return (next_char(*fdp));}PRIVATE int get_num(fd, c)int fd;int c;{	int num = NUM(c);	for (;;) {		NEXT_CHAR(fd, c);		if (c == SIO_EOF)			error("Unexpected end of file while reading number\n");		if (isdigit(c)) {			num *= 10;			num += NUM(c);		} else if (isalpha(c))			error("Unexpected character terminating number: '%c'\n", c);		else if (c == SPACE || c == NEWLINE)			break;		else {			PUTBACK(fd, c);			break;		}	}	return (num);}/* * This function is called for lines starting with '%' */PRIVATE int check_for_new_page(fd)int fd;{	char *line;	char *page_id = "Page:";	int id_len = strlen(page_id);	int new_page = FALSE;	void start_paragraph();	while (line = Srdline(fd)) {		if (line[0] != '%') {			Sundo(fd, SIO_UNDO_LINE);			break;		}		line_count++;		if (line[1] != '%')			continue;		if (strncmp(&line[2], page_id, id_len) == 0) {			if (!new_page)				start_paragraph();			new_page = TRUE;		}	}	return (new_page);}void dvi_process(fd)int fd;{	int c;	int num;	int output_string = FALSE;	int last_cmd = SPACE;	token_e last_token = TOK_OTHER;	void process_string();	void skip_until();	void skip_bracket();	void decode_string();	for (;;) {		NEXT_CHAR(fd, c);		if (c == SIO_EOF)			break;		if (c == '%') {			skip_until(fd, NEWLINE);			last_token = TOK_OTHER;			continue;		}		if (c == SPACE)			continue;		if (c == NEWLINE) {			/*			 * Peek at the next character to check if it is a line beginning 			 * with '%%'			 */			NEXT_CHAR(fd, c);			if (c == SIO_EOF)				break;			PUTBACK(fd, c);			if (c != '%')				continue;			if (check_for_new_page(fd))				output_string = FALSE;			last_token = TOK_OTHER;			continue;		}		if (c == SLASH) {			for (;;) {				NEXT_CHAR(fd, c);				if (c == SIO_EOF)					error("Reached EOF reading literal\n");				if (!isalnum(c)) {					PUTBACK(fd, c);					break;				}			}			last_token = TOK_OTHER;			continue;		}		/*		 * This piece of code has been commented out because I have found		 * valid Postscript files that contain an unbalanced number of '['.		 * Specifically, this is what was found:		 *		 *              81[31 52[28 3[31 19 22 25 31 1[28 31 47 16 2[16 31 28 19 25		 *		 * The reason may be that the '[' between numbers has a different		 * meaning but I could not find such a reference in the		 * Postscript Language Reference Manual		 *		 if ( c == OPEN_BRACKET )		 {		 skip_bracket( fd, OPEN_BRACKET, CLOSED_BRACKET ) ;		 last_token = TOK_OTHER ;		 continue ;		 }		 *		 */		if (c == OPEN_CURLY_BRACKET) {			skip_bracket(fd, OPEN_CURLY_BRACKET, CLOSED_CURLY_BRACKET);			last_token = TOK_OTHER;			continue;		}		if (c == LESS_THAN) {			skip_until(fd, GREATER_THAN);			last_token = TOK_OTHER;			continue;		}		if (c == OPEN_PAREN) {			decode_string(get_file_char, (void *) &fd);			output_string = TRUE;			last_token = TOK_STRING;			continue;		}		if (c == MINUS) {			/*			 * Check for a negative number			 */			NEXT_CHAR(fd, c);			if (!isdigit(c)) {				PUTBACK(fd, c);				continue;			}			num = -get_num(fd, c);			last_token = TOK_NUMBER;			continue;		}		if (isdigit(c)) {			num = get_num(fd, c);			/*			 * Check if we got 2 number tokens in a row			 */			if (last_token == TOK_NUMBER) {				/*				 * We got an (x,y) pair denoting the beginning of a line.				 * Do a line-feed if the last output was a string				 */				if (output_string) {					printout(NEWLINE);					output_string = FALSE;				}			}			last_token = TOK_NUMBER;			continue;		}		if (isalpha(c)) {			char letter_buf[16];			int li = 0;	/* letter buf index */			int n_letters = 0;			letter_buf[li++] = c;			n_letters++;			for (;;) {				NEXT_CHAR(fd, c);				if (c == SIO_EOF)					break;				if (!isalnum(c)) {					PUTBACK(fd, c);					break;				}				letter_buf[li++] = c;				li %= sizeof(letter_buf);				n_letters++;			}         if (n_letters > 2) {#ifdef REMOVE_DVI_PLOTS            extern char *Srdline();            char *lineptr;            if (strncmp(letter_buf, "bplot", n_letters) == 0) {               /*                * found the beginning of a plot - skip it (else we output                * lots of lines with nothing by '.' on it)                */                for (;;) {                   lineptr = Srdline(fd);                   if (lineptr == NULL)                      break;	           if (strcmp(lineptr, "eplot") == 0)                      break;                }            }#endif /* REMOVE_DVI_PLOTS */            /* ignore others */            last_token = TOK_OTHER;            continue;         }         /*          * We have a command: identify it          */         switch (letter_buf[0]) {         case 'l':         case 'm':         case 'n':			case 'o':			case 'p':			case 'q':			case 'r':			case 's':			case 't':				break;	/* command requires no action */			default:				if (output_string) {					int print_space = FALSE;					switch (last_token) {					case TOK_NUMBER:						if (num >= 0)							print_space = TRUE;						break;					case TOK_CMD:						if (last_cmd != 'p')							print_space = TRUE;						break;					default:						print_space = TRUE;					}					if (print_space) {						printout(SPACE);						output_string = FALSE;					}				}			}			last_cmd = letter_buf[0];			last_token = TOK_CMD;		}	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -