⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 robotmain.c

📁 www工具包. 这是W3C官方支持的www支撑库. 其中提供通用目的的客户端的WebAPI: complete HTTP/1.1 (with caching, pipelining, PUT, POS
💻 C
📖 第 1 页 / 共 2 页
字号:
		mr->flags |= MR_QUIET;	    /* run in really quiet mode */	    } else if (!strcmp(argv[arg], "-Q")) { 		mr->flags |= MR_REAL_QUIET;	    /* run in redirection mode */	    } else if (!strcmp(argv[arg], "-redir")) { 		mr->flags |= MR_REDIR;		mr->redir_code = (arg+1 < argc && *argv[arg+1] != '-') ?		    atoi(argv[++arg]) : 0;#ifdef WWWTRACE	    /* trace flags */	    } else if (!strncmp(argv[arg], "-v", 2)) {		HTSetTraceMessageMask(argv[arg]+2);#endif#ifdef HT_POSIX_REGEX	    /* If we can link against a POSIX regex library */	    } else if (!strncmp(argv[arg], "-inc", 4)) {		if (arg+1 < argc && *argv[arg+1] != '-') {		    mr->include = get_regtype(mr, argv[++arg], W3C_DEFAULT_REGEX_FLAGS);		}	    } else if (!strncmp(argv[arg], "-exc", 4)) {		if (arg+1 < argc && *argv[arg+1] != '-') {		    mr->exclude = get_regtype(mr, argv[++arg], W3C_DEFAULT_REGEX_FLAGS);		}	    } else if (!strncmp(argv[arg], "-check", 6)) {		if (arg+1 < argc && *argv[arg+1] != '-') {		    mr->check = get_regtype(mr, argv[++arg], W3C_DEFAULT_REGEX_FLAGS);		}	    } else if (!strcmp(argv[arg], "-norobotstxt")) {	      mr->flags |= MR_NOROBOTSTXT;#endif#ifdef HT_MYSQL	    /* If we can link against a MYSQL database library */	    } else if (!strncmp(argv[arg], "-sqldb", 5)) {		mr->sqldb = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_SQL_DB;	    } else if (!strncmp(argv[arg], "-sqlclearlinks", 10)) {		mr->sqlflags |= HTSQLLOG_CLEAR_LINKS_TABLE;	    } else if (!strncmp(argv[arg], "-sqlclearrequests", 12)) {		mr->sqlflags |= HTSQLLOG_CLEAR_REQUESTS_TABLE;	    } else if (!strncmp(argv[arg], "-sqlclearresources", 12)) {		mr->sqlflags |= HTSQLLOG_CLEAR_RESOURCES_TABLE;	    } else if (!strncmp(argv[arg], "-sqlclearuris", 10)) {		mr->sqlflags |= HTSQLLOG_CLEAR_URIS_TABLE;	    } else if (!strncmp(argv[arg], "-sqlexternals", 5)) {		mr->sqlexternals = YES;	    } else if (!strncmp(argv[arg], "-sqlpassword", 5)) {		mr->sqlpw = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_SQL_PW;	    } else if (!strncmp(argv[arg], "-sqlrelative", 5)) {		mr->sqlrelative = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : NULL;	    } else if (!strncmp(argv[arg], "-sqlserver", 5)) {		mr->sqlserver = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_SQL_SERVER;	    } else if (!strncmp(argv[arg], "-sqluser", 5)) {		mr->sqluser = (arg+1 < argc && *argv[arg+1] != '-') ?		    argv[++arg] : DEFAULT_SQL_USER;#endif	    } else {		if (SHOW_REAL_QUIET(mr)) HTPrint("Bad Argument (%s)\n", argv[arg]);	    }       } else {	 /* If no leading `-' then check for URL or keywords */    	    if (!keycnt) {	      HyperDoc *hd; /* This is new variable */		mr->furl = HTParse(argv[arg], mr->cwd, PARSE_ALL);		startAnchor = HTAnchor_parent(HTAnchor_findAddress(mr->furl));		hd = HyperDoc_new(mr, startAnchor, 0);		hd->method = METHOD_GET;		keycnt = 1;	    } else {		   /* Check for successive keyword arguments */		char *escaped = HTEscape(argv[arg], URL_XALPHAS);		if (keycnt++ <= 1)		    keywords = HTChunk_new(128);		else		    HTChunk_putc(keywords, ' ');		HTChunk_puts(keywords, HTStrip(escaped));		HT_FREE(escaped);	    }	}    }    if (!keycnt) {	VersionInfo();	Cleanup(mr, 0);    }    if (mr->depth != DEFAULT_DEPTH && 	(mr->prefix == NULL || *mr->prefix == '*')) {	if (SHOW_REAL_QUIET(mr))	    HTPrint("A depth of more than 0 requires that you also specify a URI prefix.\n",		    mr->depth);	Cleanup(mr, -1);    }    /* Testing that HTPrint is working */    if (mr->flags & MR_TIME) {	if (SHOW_REAL_QUIET(mr)) {	    time_t local = time(NULL);	    HTPrint("Welcome to the W3C mini Robot version %s - started on %s\n",		    APP_VERSION, HTDateTimeStr(&local, YES));	}    }    /* Rule file specified? */    if (mr->rules) {	char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL);	if (!HTLoadRulesAutomatically(rules))	    if (SHOW_REAL_QUIET(mr)) HTPrint("Can't access rules\n");	HT_FREE(rules);    }    /* Output file specified? */    if (mr->outputfile) {	if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) {	    if (SHOW_REAL_QUIET(mr)) HTPrint("Can't open `%s'\n", mr->outputfile);	    mr->output = OUTPUT;	}    }    /* This is new */    if ((mr->cdepth = (int *) HT_CALLOC(mr->depth+2, sizeof(int)))==NULL)	HT_OUTOFMEM("main");    /* Should we use persistent cache? */    if (cache) {	HTCacheInit(cache_root, cache_size);	/* Should we start by flushing? */	if (flush) HTCache_flushAll();    }    /* SQL Log specified? */#ifdef HT_MYSQL    if (mr->sqlserver) {	if ((mr->sqllog =	     HTSQLLog_open(mr->sqlserver,			   mr->sqluser ? mr->sqluser : DEFAULT_SQL_USER,			   mr->sqlpw ? mr->sqlpw : DEFAULT_SQL_PW,			   mr->sqldb ? mr->sqldb : DEFAULT_SQL_DB,			   mr->sqlflags)) != NULL) {	    if (mr->sqlrelative) HTSQLLog_makeRelativeTo(mr->sqllog, mr->sqlrelative);	}    }#endif    /* CLF Log file specified? */    if (mr->logfile) {        mr->log = HTLog_open(mr->logfile, YES, YES);        if (mr->log) HTNet_addAfter(HTLogFilter, NULL, mr->log, HT_ALL, HT_FILTER_LATE);    }    /* Referer Log file specified? */    if (mr->reffile) {        mr->ref = HTLog_open(mr->reffile, YES, YES);        if (mr->ref)	    HTNet_addAfter(HTRefererFilter, NULL, mr->ref, HT_ALL, HT_FILTER_LATE);    }    /* Not found error log specified? */    if (mr->notfoundfile) {        mr->notfound = HTLog_open(mr->notfoundfile, YES, YES);        if (mr->notfound)	    HTNet_addAfter(HTRefererFilter, NULL, mr->notfound, -404, HT_FILTER_LATE);    }    /* Check that the redirection code is valid */    if (mr->flags & MR_REDIR) {	BOOL isredir = NO;	if (mr->redir_code == HT_PERM_REDIRECT || mr->redir_code == 0) {	    HTNet_addAfter(redirection_handler, "http://*" , NULL, HT_PERM_REDIRECT, HT_FILTER_LATE);	    isredir = YES;	}	if (mr->redir_code == HT_TEMP_REDIRECT || mr->redir_code == 0) {	    HTNet_addAfter(redirection_handler, "http://*", NULL, HT_TEMP_REDIRECT, HT_FILTER_LATE);	    isredir = YES;	}	if (mr->redir_code == HT_FOUND || mr->redir_code == 0) {	    HTNet_addAfter(redirection_handler, "http://*", NULL, HT_FOUND, HT_FILTER_LATE);	    isredir = YES;	}	if (mr->redir_code == HT_SEE_OTHER || mr->redir_code == 0) {	    HTNet_addAfter(redirection_handler, "http://*", NULL, HT_SEE_OTHER, HT_FILTER_LATE);	    isredir = YES;	}	if (!isredir) {	    if (SHOW_REAL_QUIET(mr))		HTPrint("%d is not a valid redirection code\n", mr->redir_code);	    Cleanup(mr, -1);	}    }    /* Negotiated resource log specified? */    if (mr->connegfile) mr->conneg = HTLog_open(mr->connegfile, YES, YES);    /* No alt tags log file specified? */    if (mr->noalttagfile) mr->noalttag = HTLog_open(mr->noalttagfile, YES, YES);    /* Reject Log file specified? */    if (mr->rejectfile) mr->reject = HTLog_open(mr->rejectfile, YES, YES);#ifdef HT_POSIX_REGEX    if(!(mr->flags & MR_NOROBOTSTXT))      {      char *ruri = HTParse(ROBOTS_TXT, mr->furl, PARSE_ALL);      char *robot_str = get_robots_txt(ruri);      char *reg_exp_robot = robot_str ? 	scan_robots_txt(robot_str,APP_NAME) : NULL;      if (SHOW_REAL_QUIET(mr)) HTPrint("robots.txt uri is `%s'\n", ruri);      if(robot_str)	  HT_FREE(robot_str);      if(reg_exp_robot)	{	  mr->exc_robot = get_regtype(mr, reg_exp_robot, W3C_DEFAULT_REGEX_FLAGS);	  HT_FREE(reg_exp_robot);	}      HT_FREE(ruri);    }#endif    /* Add our own HTML HText functions */    Robot_registerHTMLParser();    /* Register our own terminate filter */    HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);    /* If doing breath first search */    if (mr->flags & MR_BFS)	HTNet_addAfter(bfs_terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST);    /* Setting event timeout */    HTHost_setEventTimeout(mr->timer);    mr->time = HTGetTimeInMillis();    /* Start the request */    finger = Finger_new(mr, startAnchor, METHOD_GET);    /*    ** Make sure that the first request is flushed immediately and not    ** buffered in the output buffer    */    HTRequest_setFlush(finger->request, YES);    /*    ** Check whether we should do some kind of cache validation on    ** the load    */    if (mr->flags & MR_VALIDATE)	HTRequest_setReloadMode(finger->request, HT_CACHE_VALIDATE);    if (mr->flags & MR_END_VALIDATE)	HTRequest_setReloadMode(finger->request, HT_CACHE_END_VALIDATE);    /*    **  Now do the load    */    if (mr->flags & MR_PREEMPTIVE)	HTRequest_setPreemptive(finger->request, YES);    if (keywords)						   /* Search */	status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request);    else	status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request);    if (keywords) HTChunk_delete(keywords);    if (status != YES) {	if (SHOW_REAL_QUIET(mr)) HTPrint("Can't access resource\n");	Cleanup(mr, -1);    }    /* Go into the event loop... */    if((mr->flags & MR_PREEMPTIVE) && (mr->flags & MR_BFS))      Serving_queue(mr);    else      HTEventList_loop(finger->request);    /* Only gets here if event loop fails */    Cleanup(mr, 0);    return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -