⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
/* * (c) Copyright 1993 by Panagiotis Tsirigotis * All rights reserved.  The file named COPYRIGHT specifies the terms  * and conditions for redistribution. */static char RCSid[] = "main.c,v 1.8 1994/08/16 23:00:20 hardy Exp";static char version[] = VERSION;#include <fcntl.h>void exit();#include "sio.h"#include "str.h"#include "options.h"#include "defs.h"int line_count = 1;typedef void (*ps_process) ();/* * Available processes */void dvi_process();void psdit_process();void troff_process();void ditroff_process();void wordperfect_process();void groff_process();PRIVATE ps_process find_process();struct process_string {	char *ps_string;	char *ps_name;	ps_process ps_func;	strs_h ps_sh;};static struct process_string procstr[] ={	{".dvi", "dvi file", dvi_process},	{"/wpdict", "wordperfect document", wordperfect_process},	{"idraw", "idraw", troff_process},	{"psdit", "ditroff through psdit", psdit_process},	{"tpscript", "Kolstad's hack", ditroff_process},	{"ditroff", "ditroff file", ditroff_process},	{"troff", "troff file", troff_process},	{"groff", "GNU groff file", groff_process},	{NULL}};struct process_name {	char *pn_name;	ps_process pn_func;};static struct process_name procnames[] ={	{"dvi", dvi_process},	{"psdit", psdit_process},	{"dit", ditroff_process},	{"troff", troff_process},	{"wp", wordperfect_process},	{"groff", groff_process},	{NULL}};int main(argc, argv)int argc;char *argv[];{	int first_arg = opt_recognize(argc, argv);	int in_fd;	ps_process process;	void printout_flush();	if (d_option)		Sbuftype(1, SIO_NOBUF);	/*     *  do it anyway; fixes bug on OSF/1.  Panos' Sprint stuff doesn't    *  seems to work properly with exit().  His flush() operation    *  never gets called, so the output just sits in the buffering.    *  So set the library to non-buffering i/o.  Must put this fix    *  at the top; otherwise, things like the -l option won't work.    */	Sbuftype(1, SIO_NOBUF);		if (argc - first_arg > 1)	/* at most 1 arg */		usage();	if (first_arg == argc)		in_fd = 0;	else {		extern int errno;		char *file = argv[first_arg];		in_fd = open(file, O_RDONLY);		if (in_fd == -1)			error("Failed to open file %s (errno = %d)\n", file, errno);	}	if (l_option) {		struct process_name *pnp;		Sprint(1, "Recognized process types:\n");		for (pnp = procnames; pnp->pn_name; pnp++)			Sprint(1, "\t%s\n", pnp->pn_name);		exit(0);	}	if (p_option) {		struct process_name *pnp;		for (pnp = procnames;; pnp++) {			if (pnp->pn_name == NULL)				error("Unknown process: %s\n", p_option_arg);			if (strcmp(pnp->pn_name, p_option_arg) == 0) {				process = pnp->pn_func;				break;			}		}	} else		process = find_process(in_fd);	if (!n_option) {		(*process) (in_fd);		printout_flush();	}	exit(0);	/* NOTREACHED */}/* * Determine what process to use to parse this file. */PRIVATE ps_process find_process(fd)int fd;{	char *line;	struct process_string *psp;	struct process_string *found = NULL;	/*	 * Create string matchers	 */	for (psp = procstr; psp->ps_string; psp++) {		psp->ps_sh = strs_setup(STRS_BF + STRS_NOMALLOC, psp->ps_string);		if (psp->ps_sh == NULL)			error("out of memory\n");	}	/*	 * Try to determine how the file was derived.	 * Method:	 *              Read lines looking for one of the identifier strings	 *              If an identifier string is found, continue reading lines	 *      until a non-comment line is found.	 */	while (line = Srdline(fd)) {		line_count++;		if (line[0] != '%' && found)			break;		/*		 * Check what process (if any) is identified by this line		 */		for (psp = procstr; psp->ps_string; psp++)			if (strs_match(psp->ps_sh, line, SIOLINELEN(fd))) {				/*				 * psdit process takes precedence over ditroff_process				 */				if (!found || found->ps_func == ditroff_process &&				    psp->ps_func == psdit_process)					found = psp;			}	}	if (!found && !e_option) {		error("Failed to identify how this file was derived\n");	}	if (v_option)		Sprint(2, "Postscript file derived from %s\n", found->ps_name);	if (e_option)		exit(!found);	return (found->ps_func);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -