⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 robotmain.c

📁 www工具包. 这是W3C官方支持的www支撑库. 其中提供通用目的的客户端的WebAPI: complete HTTP/1.1 (with caching, pipelining, PUT, POS
💻 C
📖 第 1 页 / 共 2 页
字号:
/***	@(#) $Id: RobotMain.c,v 1.11 1999/03/14 02:21:09 frystyk Exp $**	**	W3C Webbot can be found at "http://www.w3.org/Robot/"**	**	Copyright 仼 1995-1998 World Wide Web Consortium, (Massachusetts**	Institute of Technology, Institut National de Recherche en**	Informatique et en Automatique, Keio University). All Rights**	Reserved. This program is distributed under the W3C's Software**	Intellectual Property License. This program is distributed in the hope**	that it will be useful, but WITHOUT ANY WARRANTY; without even the**	implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR**	PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more**	details.****  Authors:**	HFN		Henrik Frystyk Nielsen, (frystyk@w3.org)**	BR		Bob Racko**	JP		John Punin****  History:**	Dec 04 95	First version**	Oct 1998	Split into separate files*/#include "HTRobMan.h"#include "RobotTxt.h"#define SHOW_QUIET(mr)		((mr) && !((mr)->flags & MR_QUIET))#define SHOW_REAL_QUIET(mr)	((mr) && !((mr)->flags & MR_REAL_QUIET))/* ------------------------------------------------------------------------- *//*				  MAIN PROGRAM				     *//* ------------------------------------------------------------------------- */PRIVATE int printer (const char * fmt, va_list pArgs){    return (vfprintf(stdout, fmt, pArgs));}PRIVATE int tracer (const char * fmt, va_list pArgs){    return (vfprintf(stderr, fmt, pArgs));}int main (int argc, char ** argv){    int		status = 0;    int		arg;    BOOL	cache = NO;			     /* Use persistent cache */    BOOL	flush = NO;		       /* flush the persistent cache */    char *	cache_root = NULL;    int		cache_size = DEFAULT_CACHE_SIZE;    HTChunk *	keywords = NULL;			/* From command line */    int		keycnt = 0;    Robot *	mr = NULL;    Finger *	finger = NULL;    HTParentAnchor * startAnchor = NULL;    /* Starts Mac GUSI socket library */#ifdef GUSI    GUSISetup(GUSIwithSIOUXSockets);    GUSISetup(GUSIwithInternetSockets);#endif#ifdef __MWERKS__ /* STR */    InitGraf((Ptr) &qd.thePort);     InitFonts();     InitWindows();     InitMenus(); TEInit();     InitDialogs(nil);     InitCursor();    SIOUXSettings.asktosaveonclose = false;    argc=ccommand(&argv);#endif /* __MWERKS__ */#ifdef HT_MEMLOG    HTMemLog_open(DEFAULT_MEMLOG, 8192, YES);#endif    /* Initiate W3C Reference Library with a robot profile */    HTProfile_newRobot(APP_NAME, APP_VERSION);    /* Need our own trace and print functions */    HTPrint_setCallback(printer);    HTTrace_setCallback(tracer);    /* Build a new robot object */    mr = Robot_new();    /* Scan command Line for parameters */    for (arg=1; arg<argc; arg++) {	if (*argv[arg] == '-') {	    	    /* non-interactive */	    if (!strcmp(argv[arg], "-n")) {		HTAlert_setInteractive(NO);  	    /* help */	    } else if (!strcmp(argv[arg], "-h") || !strcmp(argv[arg], "-?")) {		VersionInfo();		Cleanup(mr, 0);  	    /* clf log file */	    } else if (!strcmp(argv[arg], "-l")) {		mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_LOG_FILE;		mr->flags |= MR_LOGGING;  	    /* referer log file */	    } else if (!strncmp(argv[arg], "-ref", 4)) {		mr->reffile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_REFERER_FILE;		mr->flags |= MR_LOGGING;  	    /* Not found error log file */	    } else if (!strncmp(argv[arg], "-404", 4)) {		mr->notfoundfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_NOTFOUND_FILE;		mr->flags |= MR_LOGGING;  	    /* reject log file */	    } else if (!strncmp(argv[arg], "-rej", 4)) {		mr->rejectfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_REJECT_FILE;		mr->flags |= MR_LOGGING;  	    /* no alt tags log file */	    } else if (!strncmp(argv[arg], "-alt", 4)) {		mr->noalttagfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_NOALTTAG_FILE;		mr->flags |= MR_LOGGING;  	    /* negotiated resource log file */	    } else if (!strncmp(argv[arg], "-neg", 4)) {		mr->connegfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_CONNEG_FILE;		mr->flags |= MR_LOGGING;  	    /* hit file log */	    } else if (!strcmp(argv[arg], "-hit")) {		mr->hitfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_HIT_FILE;		mr->flags |= MR_DISTRIBUTIONS;  	    /* link relations file log */	    } else if (!strcmp(argv[arg], "-rellog")) {		mr->relfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_REL_FILE;		mr->flags |= MR_DISTRIBUTIONS;  	    /* Specific link relation to look for (only used i also -rellog) */	    } else if (!strcmp(argv[arg], "-relation")) {		mr->relation = (arg+1 < argc && *argv[arg+1] != '-') ?		    (HTLinkType) HTAtom_caseFor(argv[++arg]) : NULL;		mr->flags |= MR_DISTRIBUTIONS;  	    /* last modified log file */	    } else if (!strcmp(argv[arg], "-lm")) {		mr->lmfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_LM_FILE;		mr->flags |= MR_DISTRIBUTIONS;  	    /* title log file */	    } else if (!strcmp(argv[arg], "-title")) {		mr->titlefile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_TITLE_FILE;		mr->flags |= MR_DISTRIBUTIONS;  	    /* mediatype distribution log file */	    } else if (!strncmp(argv[arg], "-for", 4)) {		mr->mtfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_FORMAT_FILE;		mr->flags |= (MR_KEEP_META | MR_DISTRIBUTIONS);  	    /* charset distribution log file */	    } else if (!strncmp(argv[arg], "-char", 5)) {		mr->charsetfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_CHARSET_FILE;		mr->flags |= (MR_KEEP_META | MR_DISTRIBUTIONS);		            /* rule file */	    } else if (!strcmp(argv[arg], "-r")) {		mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_RULE_FILE;	    /* Don't follow HTML META tags with robot information */	    } else if (!strcmp(argv[arg], "-nometatags")) {		mr->flags |= MR_NOMETATAGS;	    /* output filename */	    } else if (!strcmp(argv[arg], "-o")) { 		mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_OUTPUT_FILE;	    /* URI prefix */	    } else if (!strcmp(argv[arg], "-prefix")) {		char * prefix = NULL;		prefix = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_PREFIX;		if (*prefix && *prefix != '*') {		    StrAllocCopy(mr->prefix, prefix);		    StrAllocCat(mr->prefix, "*");		}	    /* timeout -- Change the default request timeout */	    } else if (!strcmp(argv[arg], "-timeout")) {		int timeout = (arg+1 < argc && *argv[arg+1] != '-') ?		    atoi(argv[++arg]) : DEFAULT_TIMEOUT;		if (timeout > 1) mr->timer = timeout*MILLIES;	    /* wait -- Change the default pwait time */ /* This is new */	    } else if (!strcmp(argv[arg], "-wait")) {		int waits = (arg+1 < argc && *argv[arg+1] != '-') ?		    atoi(argv[++arg]) : 0;		if (waits > 0) mr->waits = waits;	    /* Force no pipelined requests */	    } else if (!strcmp(argv[arg], "-nopipe")) {		HTTP_setConnectionMode(HTTP_11_NO_PIPELINING);	    /* Stream write flush delay in ms */	    } else if (!strcmp(argv[arg], "-delay")) {		int delay = (arg+1 < argc && *argv[arg+1] != '-') ?		    atoi(argv[++arg]) : DEFAULT_DELAY;		HTHost_setDefaultWriteDelay(delay);	    /* Start the persistent cache */	    } else if (!strcmp(argv[arg], "-cache")) {		cache = YES;	    /* Determine the cache root */	    } else if (!strcmp(argv[arg], "-cacheroot")) { 		cache_root = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : NULL;	    /* Persistent cache flush */	    } else if (!strcmp(argv[arg], "-flush")) {		flush = YES;	    /* Do a cache validation */	    } else if (!strcmp(argv[arg], "-validate")) {		mr->flags |= MR_VALIDATE;	    } else if (!strcmp(argv[arg], "-cache_size")) {		cache_size = (arg+1 < argc && *argv[arg+1] != '-') ?		    atoi(argv[++arg]) : DEFAULT_CACHE_SIZE;	    /* Do an end-to-end cache-validation */	    } else if (!strcmp(argv[arg], "-endvalidate")) {		mr->flags |= MR_END_VALIDATE;	    /* preemptive or non-preemptive access */	    } else if (!strcmp(argv[arg], "-single")) {		mr->flags |= MR_PREEMPTIVE;	    /* test inlined images */	    } else if (!strcmp(argv[arg], "-img")) {		mr->flags |= MR_IMG;	    /* load inlined images */	    } else if (!strcmp(argv[arg], "-saveimg")) {		mr->flags |= (MR_IMG | MR_SAVE);	    /* URI prefix for inlined images */	    } else if (!strcmp(argv[arg], "-imgprefix")) {		char * prefix = NULL;		prefix = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_IMG_PREFIX;		if (*prefix && *prefix!='*') {		    StrAllocCopy(mr->img_prefix, prefix);		    StrAllocCat(mr->img_prefix, "*");		}	    /* load anchors */	    } else if (!strcmp(argv[arg], "-link") || !strcmp(argv[arg], "-depth")) {		mr->flags |= MR_LINK;		mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ?		    atoi(argv[++arg]) : DEFAULT_DEPTH;	    /* load fixed number of anchors */	    } else if (!strcmp(argv[arg], "-ndoc")) {		mr->ndoc = (arg+1 < argc && *argv[arg+1] != '-') ?		    atoi(argv[++arg]) : -1 ;	    /* Output start and end time */	    } else if (!strcmp(argv[arg], "-ss")) {		mr->flags |= MR_TIME;	    /* print version and exit */	    } else if (!strcmp(argv[arg], "-version")) { 		VersionInfo();		Cleanup(mr, 0);			    /* run in BFS mode */	    } else if (!strcmp(argv[arg], "-bfs")) { 		mr->flags |= MR_BFS;	    /* run in quiet mode */	    } else if (!strcmp(argv[arg], "-q")) { 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -