⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 collect.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
	sprintf(buf, UPDATE_INVALID);	send_msg_to_client(s, buf, 0);	return (0);    }    for (q = p; *q != '\0'; q++) {	if (!isdigit(*q)) {	    sprintf(buf, UPDATE_INVALID);	    send_msg_to_client(s, buf, 0);	    return (0);	}    }    timestamp = atoi(p);    return (send_update(s, timestamp));}/* *  send_update() - Sends all objects that have changed since timestamp to s */static int send_update(s, timestamp)     int s;     time_t timestamp;{    static char buf[BUFSIZ];    int r;    int oldxmit = nxmit;    int ofd = s;    nobjs = 0;    sprintf(buf, UPDATE_OK, timestamp);    send_msg_to_client(ofd, buf, 1);    if (allzipped != NULL && timestamp == 0 && do_compress &&	access(allzipped, R_OK) == 0) {	send_allzipped(ofd);	Log(GOODBYE_OK, remote_host, nxmit);	return (1);		/* terminate */    }    if (do_compress) {	init_compression(ofd);	ofd = topipe[1];    }    sprintf(buf, "@DELETE { }\n@REFRESH { }\n");    send_msg_to_client(ofd, buf, 0);    sprintf(buf, "@UPDATE {\n");    send_msg_to_client(ofd, buf, 0);    if (timestamp == 0)	r = send_all(ofd);    else	r = send_selected_index(ofd, timestamp);    sprintf(buf, "}\n");    send_msg_to_client(ofd, buf, 0);    sprintf(buf, UPDATE_DONE, nobjs, nxmit - oldxmit);    send_msg_to_client(ofd, buf, 1);    return (do_compress ? finish_compression(s) : r);}/* *  send_allzipped() - Sends the pre-gzip'ed object database to the client. *  This is a short-cut for servers that send process lots of SEND-UPDATE 0 *  commands in compressed mode. */static void send_allzipped(s)     int s;{    FILE *fp = NULL;    static char buf[BUFSIZ];    int n;    if ((fp = fopen(allzipped, "r")) != NULL) {	while ((n = fread(buf, 1, BUFSIZ - 1, fp)) > 0) {	    send_data_to_client(s, buf, n);#ifdef HAVE_RANDOM	    if (random() % 257 == 0) {#else	    if (rand() % 257 == 0) {#endif		Log("Written %d bytes so far to %s\n", nxmit,		    remote_host);	    }	}	fclose(fp);    }}/* *  send_all() - Sends all objects in the database to s */static int send_all(s)     int s;{    char newline = '\n';    GDBM_FILE dbf;    datum k;    datum nextkey;    datum d;    dbf = gdbm_open(dbfile, 0, GDBM_READER, 0644, NULL);    if (dbf == NULL) {	errorlog("GDBM ERROR: %s: %s\n", dbfile,	    gdbm_strerror(gdbm_errno));	return (1);    }    k = gdbm_firstkey(dbf);    while (k.dptr) {	d = gdbm_fetch(dbf, k);	if (d.dptr == NULL)	    fatal("GDBM ERROR: No fetch?: %s: %s\n", dbfile,		gdbm_strerror(gdbm_errno));	nobjs++;	send_data_to_client(s, d.dptr, d.dsize);	send_data_to_client(s, &newline, 1);	if (nobjs % 257 == 0) {	/* print status every so often */	    Log("Written %d bytes so far to %s\n", nxmit,		remote_host);	}	nextkey = gdbm_nextkey(dbf, k);	free(k.dptr);	free(d.dptr);	k = nextkey;    }    gdbm_close(dbf);    return (0);}/* *  send_object() - Sends an object in the database to s * *  Contributed by: djk@chbi.co.uk */static int send_object(s, key)     int s;     char *key;{    GDBM_FILE dbf;    char *p = NULL;    static char buf[512];    datum k;    datum d;    dbf = gdbm_open(dbfile, 0, GDBM_READER, 0644, NULL);    if (dbf == NULL) {	errorlog("GDBM ERROR: %s: %s\n", dbfile,	    gdbm_strerror(gdbm_errno));	return (1);    }    /* skip over comand word */    p = strtok(key, " \t\n");    p = strtok(NULL, " \t\n");    if (p == NULL) {	p = "<NULL>";	sprintf(buf, OBJ_INVALID, p);	send_msg_to_client(s, buf, 1);	goto lend;    }    k.dptr = strdup(p);    k.dsize = strlen(k.dptr) + 1;	/* include the \0 on the end!!! */    d = gdbm_fetch(dbf, k);    if (d.dptr == NULL) {	free(k.dptr);	sprintf(buf, OBJ_INVALID, p);	send_msg_to_client(s, buf, 1);	goto lend;    }    sprintf(buf, OBJ_OK, p);    send_msg_to_client(s, buf, 1);    send_data_to_client(s, d.dptr, d.dsize);    sprintf(buf, OBJ_DONE, p, d.dsize);    send_msg_to_client(s, buf, 1);    free(k.dptr);    free(d.dptr);  lend:    gdbm_close(dbf);    return (0);}/* *  send_selected_index() - Uses an index of timestamps to quickly *  determine which object to send to s. */static int send_selected_index(s, timestamp)     int s;     time_t timestamp;{    char newline = '\n';    GDBM_FILE dbf;    GDBM_FILE indexdbf;    datum k;    datum nextkey;    datum d;    datum td;    time_t t;    static char buf[BUFSIZ];    dbf = gdbm_open(dbfile, 0, GDBM_READER, 0644, NULL);    if (dbf == NULL) {	errorlog("GDBM ERROR: %s: %s\n", dbfile,	    gdbm_strerror(gdbm_errno));	return (1);    }    indexdbf = gdbm_open(indexfile, 0, GDBM_READER, 0644, NULL);    if (indexdbf == NULL) {	errorlog("GDBM ERROR: %s: %s\n", indexfile,	    gdbm_strerror(gdbm_errno));	return (1);    }    /*     *  For each URL in the indexfile, grab the timestamp from the     *  indexfile, then compare to see if the object has changed.     *  Return the template for the URL if it has.     *     *  This code depends on INDEX.gdbm to be in sync with     *  PRODUCTION.gdbm, but it makes the code much faster.     */    k = gdbm_firstkey(indexdbf);    while (k.dptr) {	d = gdbm_fetch(indexdbf, k);	if (d.dptr == NULL) {	    errorlog("Empty index value for %s: %s\n", k.dptr,		gdbm_strerror(gdbm_errno));	    free(k.dptr);	    gdbm_close(indexdbf);	    gdbm_close(dbf);	    return (1);	}	memcpy(buf, d.dptr, d.dsize);	buf[d.dsize] = '\0';	t = atoi(buf);	free(d.dptr);	if (t >= timestamp) {	    nobjs++;	    td = gdbm_fetch(dbf, k);	    if (td.dptr == NULL)		fatal("GDBM: No fetch?: %s: %s\n", dbfile,		    gdbm_strerror(gdbm_errno));	    send_data_to_client(s, td.dptr, td.dsize);	    send_data_to_client(s, &newline, 1);	    if (nobjs % 257 == 0) {		Log("Written %d bytes so far to %s\n", nxmit,		    remote_host);	    }	    free(td.dptr);	}	nextkey = gdbm_nextkey(indexdbf, k);	free(k.dptr);	k = nextkey;    }    gdbm_close(dbf);    gdbm_close(indexdbf);    return (0);}void Tolower(s)     char *s;{    char *p = NULL;    for (p = s; *p; p++)	if (isupper(*p))	    *p = tolower(*p);}/* *  send_msg_to_client() - Sends the string msg to s; logs msg if logit is set */static void send_msg_to_client(s, msg, logit)     int s;     char *msg;     int logit;{    if (logit)	Log("%s", msg);    send_data_to_client(s, msg, strlen(msg));}/* *  send_data_to_client() - sends msgsz bytes of msg to s */static void send_data_to_client(s, msg, msgsz)     int s;     char *msg;     int msgsz;{    int n;    if ((n = write(s, msg, msgsz)) < 0) {	log_errno("write");	exit(1);    }    if (n != msgsz) {	errorlog("Only wrote %d of %d bytes, exiting.\n", n, msgsz);	exit(1);    }    nxmit += msgsz;}/* *  process_set() - Processes the SET command */static int process_set(s, cmd)     int s;     char *cmd;{    static char buf[BUFSIZ];    char *p = NULL;    (void) strtok(cmd, " \t\n");	/* ignore set */    p = strtok(NULL, " \t\n");    if ((p == NULL)) {	sprintf(buf, SET_INVALID);	send_msg_to_client(s, buf, 0);	return (0);    }    if (!strcasecmp(p, "compression")) {	do_compress = 1;	sprintf(buf, SET_OK, "GNU zip compression");	send_msg_to_client(s, buf, 1);	return (0);    }    if (!strcasecmp(p, "nocompression")) {	do_compress = 0;	sprintf(buf, SET_OK, "Turned off GNU zip compression");	send_msg_to_client(s, buf, 1);	return (0);    }    sprintf(buf, SET_INVALID);    send_msg_to_client(s, buf, 0);    return (0);}static int gzip_pid = 0;/* *  init_compression() - Builds a pipe to a compression command, that *  reads from stdin, and writes to stdout. */static void init_compression(fd)     int fd;			/* place to write compressed data */{    if (pipe(topipe) < 0) {	log_errno("pipe");	exit(1);    }    if ((gzip_pid = fork()) < 0) {	log_errno("fork");	exit(1);    }    if (gzip_pid == 0) {	/* child */	/* Run gzip with a low priority to be nice to the server */#ifdef HAVE_NICE	nice(5);#else	int prio = getpriority(PRIO_PROCESS, 0);	if (prio != -1) {	    prio = (prio + 5) > 20 ? 20 : (prio + 5);	    if (setpriority(PRIO_PROCESS, 0, prio) < 0) {		log_errno("setpriority");	    }	}#endif	close(topipe[1]);	dup2(topipe[0], 0);	/* read:topipe -> stdin */	if (fd != 0)		/* nothing if we're inetd born */	    dup2(fd, 1);	/* fd -> stdout */	execlp(cmd_gzip, "gzip", "-cf", NULL);	log_errno("execlp: gzip");	_exit(1);    }    /* parent */    close(topipe[0]);}/* *  finish_compression() - Flushes and closes the pipe to gzip and *  closes the connection to the client. */static int finish_compression(s)     int s;{    close(topipe[1]);		/* end input to gzip */    if (gzip_pid > 0)	waitpid(gzip_pid, NULL, 0);	/* sit and wait for gzip to die */    gzip_pid = 0;    Log(GOODBYE_OK, remote_host, nxmit);    return (1);			/* terminate */}/* *  access_denied() - Returns non-zero if host isn't allowed access. *  If allow_all is set, then only the deny_hosts are checked. *  If deny_all is set, then only the allow_hosts are checked. *  If both allow_all and deny_all are set, then gatherd exits. *  If neither is set, then both deny_hosts and allow_hosts are checked. * *  All matches *must* occur at the end of the hostname.  If you allow *  any strstr() then someone could circumvent the security by prepending *  a domainname to theirs (eg. ftp.cs.colorado.edu.evil.com would be *  allowed for a cs.colorado.edu domain). */static int access_denied(host)     char *host;{    int i;    if (strend_match(host, "localhost"))	return (0);    if (allow_all && deny_all) {	errorlog("Illegal config: Both allow and deny all set\n");	return (1);    }    if (allow_all) {	for (i = 0; deny_hosts[i] != NULL; i++) {	    if (strend_match(host, deny_hosts[i]))		return (1);	}	return (0);    }    if (deny_all) {	for (i = 0; allow_hosts[i] != NULL; i++) {	    if (strend_match(host, allow_hosts[i]))		return (0);	}	return (1);    }    for (i = 0; deny_hosts[i] != NULL; i++) {	if (strend_match(host, deny_hosts[i]))	    return (1);    }    for (i = 0; allow_hosts[i] != NULL; i++) {	if (strend_match(host, allow_hosts[i]))	    return (0);    }    return (1);}/* *  strend_match() - returns non-zero if the string q is present as the *  last nbytes of string s. */static int strend_match(s, q)     char *s, *q;{    int ssz = strlen(s);    int qsz = strlen(q);    if (ssz < qsz)		/* easy case */	return (0);    if (ssz == qsz)	return (!strcmp(s, q));    return (!strcmp(s + ssz - qsz, q));}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -