📄 robotmain.c
字号:
mr->flags |= MR_QUIET; /* run in really quiet mode */ } else if (!strcmp(argv[arg], "-Q")) { mr->flags |= MR_REAL_QUIET; /* run in redirection mode */ } else if (!strcmp(argv[arg], "-redir")) { mr->flags |= MR_REDIR; mr->redir_code = (arg+1 < argc && *argv[arg+1] != '-') ? atoi(argv[++arg]) : 0;#ifdef WWWTRACE /* trace flags */ } else if (!strncmp(argv[arg], "-v", 2)) { HTSetTraceMessageMask(argv[arg]+2);#endif#ifdef HT_POSIX_REGEX /* If we can link against a POSIX regex library */ } else if (!strncmp(argv[arg], "-inc", 4)) { if (arg+1 < argc && *argv[arg+1] != '-') { mr->include = get_regtype(mr, argv[++arg], W3C_DEFAULT_REGEX_FLAGS); } } else if (!strncmp(argv[arg], "-exc", 4)) { if (arg+1 < argc && *argv[arg+1] != '-') { mr->exclude = get_regtype(mr, argv[++arg], W3C_DEFAULT_REGEX_FLAGS); } } else if (!strncmp(argv[arg], "-check", 6)) { if (arg+1 < argc && *argv[arg+1] != '-') { mr->check = get_regtype(mr, argv[++arg], W3C_DEFAULT_REGEX_FLAGS); } } else if (!strcmp(argv[arg], "-norobotstxt")) { mr->flags |= MR_NOROBOTSTXT;#endif#ifdef HT_MYSQL /* If we can link against a MYSQL database library */ } else if (!strncmp(argv[arg], "-sqldb", 5)) { mr->sqldb = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_SQL_DB; } else if (!strncmp(argv[arg], "-sqlclearlinks", 10)) { mr->sqlflags |= HTSQLLOG_CLEAR_LINKS_TABLE; } else if (!strncmp(argv[arg], "-sqlclearrequests", 12)) { mr->sqlflags |= HTSQLLOG_CLEAR_REQUESTS_TABLE; } else if (!strncmp(argv[arg], "-sqlclearresources", 12)) { mr->sqlflags |= HTSQLLOG_CLEAR_RESOURCES_TABLE; } else if (!strncmp(argv[arg], "-sqlclearuris", 10)) { mr->sqlflags |= HTSQLLOG_CLEAR_URIS_TABLE; } else if (!strncmp(argv[arg], "-sqlexternals", 5)) { mr->sqlexternals = YES; } else if (!strncmp(argv[arg], "-sqlpassword", 5)) { mr->sqlpw = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_SQL_PW; } else if (!strncmp(argv[arg], "-sqlrelative", 5)) { mr->sqlrelative = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : NULL; } else if (!strncmp(argv[arg], "-sqlserver", 5)) { mr->sqlserver = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_SQL_SERVER; } else if (!strncmp(argv[arg], "-sqluser", 5)) { mr->sqluser = (arg+1 < argc && *argv[arg+1] != '-') ? argv[++arg] : DEFAULT_SQL_USER;#endif } else { if (SHOW_REAL_QUIET(mr)) HTPrint("Bad Argument (%s)\n", argv[arg]); } } else { /* If no leading `-' then check for URL or keywords */ if (!keycnt) { HyperDoc *hd; /* This is new variable */ mr->furl = HTParse(argv[arg], mr->cwd, PARSE_ALL); startAnchor = HTAnchor_parent(HTAnchor_findAddress(mr->furl)); hd = HyperDoc_new(mr, startAnchor, 0); hd->method = METHOD_GET; keycnt = 1; } else { /* Check for successive keyword arguments */ char *escaped = HTEscape(argv[arg], URL_XALPHAS); if (keycnt++ <= 1) keywords = HTChunk_new(128); else HTChunk_putc(keywords, ' '); HTChunk_puts(keywords, HTStrip(escaped)); HT_FREE(escaped); } } } if (!keycnt) { VersionInfo(); Cleanup(mr, 0); } if (mr->depth != DEFAULT_DEPTH && (mr->prefix == NULL || *mr->prefix == '*')) { if (SHOW_REAL_QUIET(mr)) HTPrint("A depth of more than 0 requires that you also specify a URI prefix.\n", mr->depth); Cleanup(mr, -1); } /* Testing that HTPrint is working */ if (mr->flags & MR_TIME) { if (SHOW_REAL_QUIET(mr)) { time_t local = time(NULL); HTPrint("Welcome to the W3C mini Robot version %s - started on %s\n", APP_VERSION, HTDateTimeStr(&local, YES)); } } /* Rule file specified? */ if (mr->rules) { char * rules = HTParse(mr->rules, mr->cwd, PARSE_ALL); if (!HTLoadRulesAutomatically(rules)) if (SHOW_REAL_QUIET(mr)) HTPrint("Can't access rules\n"); HT_FREE(rules); } /* Output file specified? */ if (mr->outputfile) { if ((mr->output = fopen(mr->outputfile, "wb")) == NULL) { if (SHOW_REAL_QUIET(mr)) HTPrint("Can't open `%s'\n", mr->outputfile); mr->output = OUTPUT; } } /* This is new */ if ((mr->cdepth = (int *) HT_CALLOC(mr->depth+2, sizeof(int)))==NULL) HT_OUTOFMEM("main"); /* Should we use persistent cache? */ if (cache) { HTCacheInit(cache_root, cache_size); /* Should we start by flushing? */ if (flush) HTCache_flushAll(); } /* SQL Log specified? */#ifdef HT_MYSQL if (mr->sqlserver) { if ((mr->sqllog = HTSQLLog_open(mr->sqlserver, mr->sqluser ? mr->sqluser : DEFAULT_SQL_USER, mr->sqlpw ? mr->sqlpw : DEFAULT_SQL_PW, mr->sqldb ? mr->sqldb : DEFAULT_SQL_DB, mr->sqlflags)) != NULL) { if (mr->sqlrelative) HTSQLLog_makeRelativeTo(mr->sqllog, mr->sqlrelative); } }#endif /* CLF Log file specified? */ if (mr->logfile) { mr->log = HTLog_open(mr->logfile, YES, YES); if (mr->log) HTNet_addAfter(HTLogFilter, NULL, mr->log, HT_ALL, HT_FILTER_LATE); } /* Referer Log file specified? */ if (mr->reffile) { mr->ref = HTLog_open(mr->reffile, YES, YES); if (mr->ref) HTNet_addAfter(HTRefererFilter, NULL, mr->ref, HT_ALL, HT_FILTER_LATE); } /* Not found error log specified? */ if (mr->notfoundfile) { mr->notfound = HTLog_open(mr->notfoundfile, YES, YES); if (mr->notfound) HTNet_addAfter(HTRefererFilter, NULL, mr->notfound, -404, HT_FILTER_LATE); } /* Check that the redirection code is valid */ if (mr->flags & MR_REDIR) { BOOL isredir = NO; if (mr->redir_code == HT_PERM_REDIRECT || mr->redir_code == 0) { HTNet_addAfter(redirection_handler, "http://*" , NULL, HT_PERM_REDIRECT, HT_FILTER_LATE); isredir = YES; } if (mr->redir_code == HT_TEMP_REDIRECT || mr->redir_code == 0) { HTNet_addAfter(redirection_handler, "http://*", NULL, HT_TEMP_REDIRECT, HT_FILTER_LATE); isredir = YES; } if (mr->redir_code == HT_FOUND || mr->redir_code == 0) { HTNet_addAfter(redirection_handler, "http://*", NULL, HT_FOUND, HT_FILTER_LATE); isredir = YES; } if (mr->redir_code == HT_SEE_OTHER || mr->redir_code == 0) { HTNet_addAfter(redirection_handler, "http://*", NULL, HT_SEE_OTHER, HT_FILTER_LATE); isredir = YES; } if (!isredir) { if (SHOW_REAL_QUIET(mr)) HTPrint("%d is not a valid redirection code\n", mr->redir_code); Cleanup(mr, -1); } } /* Negotiated resource log specified? */ if (mr->connegfile) mr->conneg = HTLog_open(mr->connegfile, YES, YES); /* No alt tags log file specified? */ if (mr->noalttagfile) mr->noalttag = HTLog_open(mr->noalttagfile, YES, YES); /* Reject Log file specified? */ if (mr->rejectfile) mr->reject = HTLog_open(mr->rejectfile, YES, YES);#ifdef HT_POSIX_REGEX if(!(mr->flags & MR_NOROBOTSTXT)) { char *ruri = HTParse(ROBOTS_TXT, mr->furl, PARSE_ALL); char *robot_str = get_robots_txt(ruri); char *reg_exp_robot = robot_str ? scan_robots_txt(robot_str,APP_NAME) : NULL; if (SHOW_REAL_QUIET(mr)) HTPrint("robots.txt uri is `%s'\n", ruri); if(robot_str) HT_FREE(robot_str); if(reg_exp_robot) { mr->exc_robot = get_regtype(mr, reg_exp_robot, W3C_DEFAULT_REGEX_FLAGS); HT_FREE(reg_exp_robot); } HT_FREE(ruri); }#endif /* Add our own HTML HText functions */ Robot_registerHTMLParser(); /* Register our own terminate filter */ HTNet_addAfter(terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST); /* If doing breath first search */ if (mr->flags & MR_BFS) HTNet_addAfter(bfs_terminate_handler, NULL, NULL, HT_ALL, HT_FILTER_LAST); /* Setting event timeout */ HTHost_setEventTimeout(mr->timer); mr->time = HTGetTimeInMillis(); /* Start the request */ finger = Finger_new(mr, startAnchor, METHOD_GET); /* ** Make sure that the first request is flushed immediately and not ** buffered in the output buffer */ HTRequest_setFlush(finger->request, YES); /* ** Check whether we should do some kind of cache validation on ** the load */ if (mr->flags & MR_VALIDATE) HTRequest_setReloadMode(finger->request, HT_CACHE_VALIDATE); if (mr->flags & MR_END_VALIDATE) HTRequest_setReloadMode(finger->request, HT_CACHE_END_VALIDATE); /* ** Now do the load */ if (mr->flags & MR_PREEMPTIVE) HTRequest_setPreemptive(finger->request, YES); if (keywords) /* Search */ status = HTSearchAnchor(keywords, (HTAnchor *)startAnchor, finger->request); else status = HTLoadAnchor((HTAnchor *)startAnchor, finger->request); if (keywords) HTChunk_delete(keywords); if (status != YES) { if (SHOW_REAL_QUIET(mr)) HTPrint("Can't access resource\n"); Cleanup(mr, -1); } /* Go into the event loop... */ if((mr->flags & MR_PREEMPTIVE) && (mr->flags & MR_BFS)) Serving_queue(mr); else HTEventList_loop(finger->request); /* Only gets here if event loop fails */ Cleanup(mr, 0); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -