📄 robotmain.c
字号:
/*** @(#) $Id: RobotMain.c,v 1.11 1999/03/14 02:21:09 frystyk Exp $** ** W3C Webbot can be found at "http://www.w3.org/Robot/"** ** Copyright 仼 1995-1998 World Wide Web Consortium, (Massachusetts** Institute of Technology, Institut National de Recherche en** Informatique et en Automatique, Keio University). All Rights** Reserved. This program is distributed under the W3C's Software** Intellectual Property License. This program is distributed in the hope** that it will be useful, but WITHOUT ANY WARRANTY; without even the** implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR** PURPOSE. See W3C License http://www.w3.org/Consortium/Legal/ for more** details.**** Authors:** HFN Henrik Frystyk Nielsen, (frystyk@w3.org)** BR Bob Racko** JP John Punin**** History:** Dec 04 95 First version** Oct 1998 Split into separate files*/#include "HTRobMan.h"#include "RobotTxt.h"#define SHOW_QUIET(mr) ((mr) && !((mr)->flags & MR_QUIET))#define SHOW_REAL_QUIET(mr) ((mr) && !((mr)->flags & MR_REAL_QUIET))/* ------------------------------------------------------------------------- *//* MAIN PROGRAM *//* ------------------------------------------------------------------------- */PRIVATE int printer (const char * fmt, va_list pArgs){ return (vfprintf(stdout, fmt, pArgs));}PRIVATE int tracer (const char * fmt, va_list pArgs){ return (vfprintf(stderr, fmt, pArgs));}int main (int argc, char ** argv){ int status = 0; int arg; BOOL cache = NO; /* Use persistent cache */ BOOL flush = NO; /* flush the persistent cache */ char * cache_root = NULL; int cache_size = DEFAULT_CACHE_SIZE; HTChunk * keywords = NULL; /* From command line */ int keycnt = 0; Robot * mr = NULL; Finger * finger = NULL; HTParentAnchor * startAnchor = NULL; /* Starts Mac GUSI socket library */#ifdef GUSI GUSISetup(GUSIwithSIOUXSockets); GUSISetup(GUSIwithInternetSockets);#endif#ifdef __MWERKS__ /* STR */ InitGraf((Ptr) &qd.thePort); InitFonts(); InitWindows(); InitMenus(); TEInit(); InitDialogs(nil); InitCursor(); SIOUXSettings.asktosaveonclose = false; argc=ccommand(&argv);#endif /* __MWERKS__ */#ifdef HT_MEMLOG HTMemLog_open(DEFAULT_MEMLOG, 8192, YES);#endif /* Initiate W3C Reference Library with a robot profile */ HTProfile_newRobot(APP_NAME, APP_VERSION); /* Need our own trace and print functions */ HTPrint_setCallback(printer); HTTrace_setCallback(tracer); /* Build a new robot object */ mr = Robot_new(); /* Scan command Line for parameters */ for (arg=1; arg<argc; arg++) { if (*argv[arg] == '-') { /* non-interactive */ if (!strcmp(argv[arg], "-n")) { HTAlert_setInteractive(NO); /* help */ } else if (!strcmp(argv[arg], "-h") || !strcmp(argv[arg], "-?")) { VersionInfo(); Cleanup(mr, 0); /* clf log file */ } else if (!strcmp(argv[arg], "-l")) { mr->logfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_LOG_FILE; mr->flags |= MR_LOGGING; /* referer log file */ } else if (!strncmp(argv[arg], "-ref", 4)) { mr->reffile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_REFERER_FILE; mr->flags |= MR_LOGGING; /* Not found error log file */ } else if (!strncmp(argv[arg], "-404", 4)) { mr->notfoundfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_NOTFOUND_FILE; mr->flags |= MR_LOGGING; /* reject log file */ } else if (!strncmp(argv[arg], "-rej", 4)) { mr->rejectfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_REJECT_FILE; mr->flags |= MR_LOGGING; /* no alt tags log file */ } else if (!strncmp(argv[arg], "-alt", 4)) { mr->noalttagfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_NOALTTAG_FILE; mr->flags |= MR_LOGGING; /* negotiated resource log file */ } else if (!strncmp(argv[arg], "-neg", 4)) { mr->connegfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_CONNEG_FILE; mr->flags |= MR_LOGGING; /* hit file log */ } else if (!strcmp(argv[arg], "-hit")) { mr->hitfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_HIT_FILE; mr->flags |= MR_DISTRIBUTIONS; /* link relations file log */ } else if (!strcmp(argv[arg], "-rellog")) { mr->relfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_REL_FILE; mr->flags |= MR_DISTRIBUTIONS; /* Specific link relation to look for (only used i also -rellog) */ } else if (!strcmp(argv[arg], "-relation")) { mr->relation = (arg+1 < argc && *argv[arg+1] != '-') ? (HTLinkType) HTAtom_caseFor(argv[++arg]) : NULL; mr->flags |= MR_DISTRIBUTIONS; /* last modified log file */ } else if (!strcmp(argv[arg], "-lm")) { mr->lmfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_LM_FILE; mr->flags |= MR_DISTRIBUTIONS; /* title log file */ } else if (!strcmp(argv[arg], "-title")) { mr->titlefile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_TITLE_FILE; mr->flags |= MR_DISTRIBUTIONS; /* mediatype distribution log file */ } else if (!strncmp(argv[arg], "-for", 4)) { mr->mtfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_FORMAT_FILE; mr->flags |= (MR_KEEP_META | MR_DISTRIBUTIONS); /* charset distribution log file */ } else if (!strncmp(argv[arg], "-char", 5)) { mr->charsetfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_CHARSET_FILE; mr->flags |= (MR_KEEP_META | MR_DISTRIBUTIONS); /* rule file */ } else if (!strcmp(argv[arg], "-r")) { mr->rules = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_RULE_FILE; /* Don't follow HTML META tags with robot information */ } else if (!strcmp(argv[arg], "-nometatags")) { mr->flags |= MR_NOMETATAGS; /* output filename */ } else if (!strcmp(argv[arg], "-o")) { mr->outputfile = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_OUTPUT_FILE; /* URI prefix */ } else if (!strcmp(argv[arg], "-prefix")) { char * prefix = NULL; prefix = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_PREFIX; if (*prefix && *prefix != '*') { StrAllocCopy(mr->prefix, prefix); StrAllocCat(mr->prefix, "*"); } /* timeout -- Change the default request timeout */ } else if (!strcmp(argv[arg], "-timeout")) { int timeout = (arg+1 < argc && *argv[arg+1] != '-') ? atoi(argv[++arg]) : DEFAULT_TIMEOUT; if (timeout > 1) mr->timer = timeout*MILLIES; /* wait -- Change the default pwait time */ /* This is new */ } else if (!strcmp(argv[arg], "-wait")) { int waits = (arg+1 < argc && *argv[arg+1] != '-') ? atoi(argv[++arg]) : 0; if (waits > 0) mr->waits = waits; /* Force no pipelined requests */ } else if (!strcmp(argv[arg], "-nopipe")) { HTTP_setConnectionMode(HTTP_11_NO_PIPELINING); /* Stream write flush delay in ms */ } else if (!strcmp(argv[arg], "-delay")) { int delay = (arg+1 < argc && *argv[arg+1] != '-') ? atoi(argv[++arg]) : DEFAULT_DELAY; HTHost_setDefaultWriteDelay(delay); /* Start the persistent cache */ } else if (!strcmp(argv[arg], "-cache")) { cache = YES; /* Determine the cache root */ } else if (!strcmp(argv[arg], "-cacheroot")) { cache_root = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : NULL; /* Persistent cache flush */ } else if (!strcmp(argv[arg], "-flush")) { flush = YES; /* Do a cache validation */ } else if (!strcmp(argv[arg], "-validate")) { mr->flags |= MR_VALIDATE; } else if (!strcmp(argv[arg], "-cache_size")) { cache_size = (arg+1 < argc && *argv[arg+1] != '-') ? atoi(argv[++arg]) : DEFAULT_CACHE_SIZE; /* Do an end-to-end cache-validation */ } else if (!strcmp(argv[arg], "-endvalidate")) { mr->flags |= MR_END_VALIDATE; /* preemptive or non-preemptive access */ } else if (!strcmp(argv[arg], "-single")) { mr->flags |= MR_PREEMPTIVE; /* test inlined images */ } else if (!strcmp(argv[arg], "-img")) { mr->flags |= MR_IMG; /* load inlined images */ } else if (!strcmp(argv[arg], "-saveimg")) { mr->flags |= (MR_IMG | MR_SAVE); /* URI prefix for inlined images */ } else if (!strcmp(argv[arg], "-imgprefix")) { char * prefix = NULL; prefix = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_IMG_PREFIX; if (*prefix && *prefix!='*') { StrAllocCopy(mr->img_prefix, prefix); StrAllocCat(mr->img_prefix, "*"); } /* load anchors */ } else if (!strcmp(argv[arg], "-link") || !strcmp(argv[arg], "-depth")) { mr->flags |= MR_LINK; mr->depth = (arg+1 < argc && *argv[arg+1] != '-') ? atoi(argv[++arg]) : DEFAULT_DEPTH; /* load fixed number of anchors */ } else if (!strcmp(argv[arg], "-ndoc")) { mr->ndoc = (arg+1 < argc && *argv[arg+1] != '-') ? atoi(argv[++arg]) : -1 ; /* Output start and end time */ } else if (!strcmp(argv[arg], "-ss")) { mr->flags |= MR_TIME; /* print version and exit */ } else if (!strcmp(argv[arg], "-version")) { VersionInfo(); Cleanup(mr, 0); /* run in BFS mode */ } else if (!strcmp(argv[arg], "-bfs")) { mr->flags |= MR_BFS; /* run in quiet mode */ } else if (!strcmp(argv[arg], "-q")) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -