⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htrobot.c

📁 www工具包
💻 C
📖 第 1 页 / 共 3 页
字号:
    /* Calculate efficiency */    if (mr->time > 0) {	ms_t t = HTGetTimeInMillis() - mr->time;	if (t > 0) {	    double loadfactor = (mr->get_bytes / (t * 0.001));	    double reqprsec = (total_docs / (t * 0.001));	    double secs = t / 1000.0;            char bytes[50];	    if (SHOW_REAL_QUIET(mr))		HTPrint("\nAccessed %ld documents in %.2f seconds (%.2f requests pr sec)\n",			total_docs, secs, reqprsec);            HTNumToStr(mr->get_bytes, bytes, 50);	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tDid a GET on %ld document(s) and downloaded %s bytes of document bodies (%2.1f bytes/sec)\n",			mr->get_docs, bytes, loadfactor);            HTNumToStr(mr->head_bytes, bytes, 50);	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tDid a HEAD on %ld document(s) with a total of %s bytes\n",			mr->head_docs, bytes);	}    }    /* Create an array of existing anchors */    if (total_docs > 1) {	HTArray * array = HTAnchor_getArray(total_docs);        if (array) {	    /* Distributions */	    if (mr->flags & MR_DISTRIBUTIONS) {		if (SHOW_REAL_QUIET(mr)) HTPrint("\nDistributions:\n");	    }            /* Sort after hit counts */            if (mr->hitfile) {		if (SHOW_REAL_QUIET(mr))		    HTPrint("\tLogged hit count distribution in file `%s\'\n",			    mr->hitfile);		calculate_hits(mr, array);	    }            /* Sort after link relations */#ifdef HT_MYSQL            if (mr->relfile || mr->sqllog) {#else            if (mr->relfile) {#endif		if (mr->relfile && SHOW_REAL_QUIET(mr))		    HTPrint("\tLogged link relationship distribution in file `%s\'\n",			    mr->relfile);		calculate_linkRelations(mr, array);	    }            /* Sort after modified date */            if (mr->lmfile) {		if (SHOW_REAL_QUIET(mr))		    HTPrint("\tLogged last modified distribution in file `%s\'\n",			    mr->lmfile);		calculate_lm(mr, array);	    }            /* Sort after title */            if (mr->titlefile) {		if (SHOW_REAL_QUIET(mr))		    HTPrint("\tLogged title distribution in file `%s\'\n",			    mr->titlefile);		calculate_title(mr, array);	    }            /* Find mediatype distribution */	    if (mr->mtfile) {		HTList * mtdist = mediatype_distribution(array);		if (mtdist) {		    if (SHOW_REAL_QUIET(mr))			HTPrint("\tLogged media type distribution in file `%s\'\n",				mr->mtfile);		    log_meta_distribution(mr->mtfile, mtdist);		    delete_meta_distribution(mtdist);		}	    }            /* Find charset distribution */	    if (mr->charsetfile) {		HTList * charsetdist = charset_distribution(array);		if (charsetdist) {		    if (SHOW_REAL_QUIET(mr))			HTPrint("\tLogged charset distribution in file `%s\'\n",				mr->charsetfile);		    log_meta_distribution(mr->charsetfile, charsetdist);		    delete_meta_distribution(charsetdist);		}	    }            /* Add as may other stats here as you like */	    /* ... */	    	    /* Delete the array */            HTArray_delete(array);        }    }    return YES;}PRIVATE HTParentAnchor *get_last_parent(HTParentAnchor *anchor){  HTAnchor *anc;  HTList *sources = anchor->sources;  while((anc = (HTAnchor *) HTList_nextObject(sources)) != NULL)    {      HTParentAnchor *panchor = HTAnchor_parent(anc);      return panchor;    }  return NULL;}PRIVATE voidset_error_state_hyperdoc(HyperDoc * hd, HTRequest *request){  HTList * cur = HTRequest_error(request);  HTError *pres;  while((pres = (HTError *) HTList_nextObject(cur)) != NULL)    {      int code =HTErrors[HTError_index(pres)].code;      hd->code = code;    }}PRIVATE inttest_for_blank_spaces(char *uri){  char *ptr = uri;  for(;*ptr!='\0';ptr++)    if(*ptr == ' ')      return 1;  return 0;}/*	Create a Command Line Object**	----------------------------*/PUBLIC Robot * Robot_new (void){    Robot * me;    if ((me = (Robot *) HT_CALLOC(1, sizeof(Robot))) == NULL)	HT_OUTOFMEM("Robot_new");    me->hyperdoc = HTList_new();    me->htext = HTList_new();    me->timer = DEFAULT_TIMEOUT*MILLIES;    me->waits = 0;    me->cwd = HTGetCurrentDirectoryURL();    me->output = OUTPUT;    me->cnt = 0;    me->ndoc = -1;    me->fingers = HTList_new();    /* This is new */    me->queue = HTQueue_new();    me->cq = 0;    me->furl = NULL;    return me;}/*	Delete a Command Line Object**	----------------------------*/PRIVATE BOOL Robot_delete (Robot * mr){    if (mr) {	HTList_delete(mr->fingers);       	/* Calculate statistics */	calculate_statistics(mr);        if (mr->hyperdoc) {	    HTList * cur = mr->hyperdoc;	    HyperDoc * pres;	    while ((pres = (HyperDoc *) HTList_nextObject(cur)))		HyperDoc_delete(pres);	    HTList_delete(mr->hyperdoc);	}	if (mr->htext) {	    HTList * cur = mr->htext;	    HText * pres;	    while ((pres = (HText *) HTList_nextObject(cur)))		RHText_delete(pres);	    HTList_delete(mr->htext);	}	/* Close all the log files */	if (mr->flags & MR_LOGGING) {	    if (SHOW_REAL_QUIET(mr)) HTPrint("\nRaw Log files:\n");	}	if (mr->log) {	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tLogged %5d entries in general log file `%s\'\n",			HTLog_accessCount(mr->log), mr->logfile);	    HTLog_close(mr->log);	}	if (mr->ref) {	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tLogged %5d entries in referer log file `%s\'\n",			HTLog_accessCount(mr->ref), mr->reffile);	    HTLog_close(mr->ref);	}	if (mr->reject) {	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tLogged %5d entries in rejected log file `%s\'\n",			HTLog_accessCount(mr->reject), mr->rejectfile);	    HTLog_close(mr->reject);	}	if (mr->notfound) {	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tLogged %5d entries in not found log file `%s\'\n",			HTLog_accessCount(mr->notfound), mr->notfoundfile);	    HTLog_close(mr->notfound);	}	if (mr->conneg) {	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tLogged %5d entries in content negotiation log file `%s\'\n",			HTLog_accessCount(mr->conneg), mr->connegfile);	    HTLog_close(mr->conneg);	}	if (mr->noalttag) {	    if (SHOW_REAL_QUIET(mr))		HTPrint("\tLogged %5d entries in missing alt tag log file `%s\'\n",			HTLog_accessCount(mr->noalttag), mr->noalttagfile);	    HTLog_close(mr->noalttag);	}	if (mr->output && mr->output != STDOUT) fclose(mr->output);	if (mr->flags & MR_TIME) {	    time_t local = time(NULL);	    if (SHOW_REAL_QUIET(mr))		HTPrint("\nRobot terminated %s\n", HTDateTimeStr(&local, YES));	}	/* This is new */#if 0	if (mr->cdepth) FT_FREE(mr->cdepth);#endif	if(mr->furl) HT_FREE(mr->furl);#ifdef HT_POSIX_REGEX	if (mr->include) {	    regfree(mr->include);	    HT_FREE(mr->include);	}	if (mr->exclude) {	    regfree(mr->exclude);	    HT_FREE(mr->exclude);	}	if (mr->exc_robot) {	    regfree(mr->exc_robot);	    HT_FREE(mr->exc_robot);	}	if (mr->check) {	    regfree(mr->check);	    HT_FREE(mr->check);	}#endif#ifdef HT_MYSQL	if (mr->sqllog) {	    HTSQLLog_close(mr->sqllog);	    mr->sqllog = NULL;	}#endif	if (mr->queue) HTQueue_delete(mr->queue);	HT_FREE(mr->cwd);	HT_FREE(mr->prefix);	HT_FREE(mr->img_prefix);	HT_FREE(mr);	return YES;    }    return NO;}/***  This function creates a new finger object and initializes it with a new request*/PUBLIC Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method){    Finger * me;    HTRequest * request = HTRequest_new();    if ((me = (Finger *) HT_CALLOC(1, sizeof(Finger))) == NULL)	HT_OUTOFMEM("Finger_new");    me->robot = robot;    me->request = request;    me->dest = dest;    HTList_addObject(robot->fingers, (void *)me);    /* Set the context for this request */    HTRequest_setContext (request, me);    /* Check the various flags to customize the request */    if (robot->flags & MR_PREEMPTIVE)	HTRequest_setPreemptive(request, YES);    if (robot->flags & MR_VALIDATE)	HTRequest_setReloadMode(request, HT_CACHE_VALIDATE);    if (robot->flags & MR_END_VALIDATE)	HTRequest_setReloadMode(request, HT_CACHE_END_VALIDATE);    /* We wanna make sure that we are sending a Host header (default) */    HTRequest_addRqHd(request, HT_C_HOST);    /* Set the method for this request */    HTRequest_setMethod(request, method);    robot->cnt++;    return me;}PRIVATE int Finger_delete (Finger * me){    HTList_removeObject(me->robot->fingers, (void *)me);    me->robot->cnt--;    /*    **  If we are down at one request then flush the output buffer    */    if (me->request) {	if (me->robot->cnt == 1) HTRequest_forceFlush(me->request);	HTRequest_delete(me->request);    }    /*    **  Delete the request and free myself    */    HT_FREE(me);    return YES;}/***  Cleanup and make sure we close all connections including the persistent**  ones*/PUBLIC void Cleanup (Robot * me, int status){    HTProfile_delete();    Robot_delete(me);#ifdef HT_MEMLOG    HTMemLog_close();#endif#ifdef VMS    exit(status ? status : 1);#else    exit(status ? status : 0);#endif}#ifdef HT_POSIX_REGEXPRIVATE char * get_regerror (int errcode, regex_t * compiled){    size_t length = regerror (errcode, compiled, NULL, 0);    char * str = NULL;    if ((str = (char *) HT_MALLOC(length+1)) == NULL)	HT_OUTOFMEM("get_regerror");    (void) regerror (errcode, compiled, str, length);    return str;}PUBLIC regex_t * get_regtype (Robot * mr, const char * regex_str, int cflags){    regex_t * regex = NULL;    if (regex_str && *regex_str) {	int status;	if ((regex = (regex_t *) HT_CALLOC(1, sizeof(regex_t))) == NULL)	    HT_OUTOFMEM("get_regtype");	if ((status = regcomp(regex, regex_str, cflags))) {	    char * err_msg = get_regerror(status, regex);	    if (SHOW_REAL_QUIET(mr))		HTPrint("Regular expression error: %s\n", err_msg);	    HT_FREE(err_msg);	    Cleanup(mr, -1);	}    }    return regex;}#endifPUBLIC void VersionInfo (void){    HTPrint("\nW3C OpenSource Software");    HTPrint("\n-----------------------\n\n");    HTPrint("\tWebbot version %s\n", APP_VERSION);    HTPrint("\tusing the W3C libwww library version %s.\n\n",HTLib_version());    HTPrint("\tSee \"%s\" for help\n", COMMAND_LINE);    HTPrint("\tSee \"http://www.w3.org/Robot/User/\" for user information\n");    HTPrint("\tSee \"http://www.w3.org/Robot/\" for general information\n\n");    HTPrint("\tPlease send feedback to the <www-lib@w3.org> mailing list,\n");    HTPrint("\tsee \"http://www.w3.org/Library/#Forums\" for details\n\n");}/*	terminate_handler**	-----------------**	This function is registered to handle the result of the request.**	If no more requests are pending then terminate program*/PUBLIC int terminate_handler (HTRequest * request, HTResponse * response,			       void * param, int status) {    Finger * finger = (Finger *) HTRequest_context(request);    Robot * mr = finger->robot;    if (SHOW_QUIET(mr)) HTPrint("Robot....... done with %s\n", HTAnchor_physical(finger->dest));#ifdef HT_MYSQL    if (mr->sqllog) HTSQLLog_addEntry(mr->sqllog, request, status);#endif    /* Check if negotiated resource and whether we should log that*/    if (mr->conneg) {	HTAssocList * cur = HTResponse_variant(response);	if (cur) {	    BOOL first = YES;	    HTChunk * buffer = HTChunk_new(128);	    char * uri = HTAnchor_address((HTAnchor *) finger->dest);	    HTAssoc * pres;	    HTChunk_puts(buffer, uri);	    while ((pres = (HTAssoc *) HTAssocList_nextObject(cur))) {		char * value = HTAssoc_value(pres);		if (first) {		    HTChunk_puts(buffer, "\t(");		    first = NO;		} else		    HTChunk_puts(buffer, ", ");		/* Output the name */		HTChunk_puts(buffer, HTAssoc_name(pres));		/* Only output the value if not empty string */		if (value && *value) {		    HTChunk_puts(buffer, "=");

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -