⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 db.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
			    template->url);			xfree(k.dptr);			free_template(ct);			return;		}		appending = 1;	}	b = init_print_template(NULL);	print_template(appending ? ct : template);	d.dptr = b->data;	d.dsize = b->length;	Debug(61, 1, ("Adding to GDBM file: key{%d}, data{%d}: %s\n",		k.dsize, d.dsize, (appending) ? "REPLACING" : "INSERTING"));	/* Store the data into the database */	if (gdbm_store(dbf, k, d, (appending) ? GDBM_REPLACE : GDBM_INSERT)) {		errorlog("gdbm_store: %s: %s: %s\n", k.dptr, dbfile, gdbm_strerror(gdbm_errno));	}	/* Clean up */	finish_print_template();	/* frees datum d */	xfree(k.dptr);	if (ct != NULL)		free_template(ct);	/* Make sure that there haven't been too many deletions */	if (appending)		ndeletions++;	/* gdbm_store will cause a deletion */	if ((max_deletions > 0) && (ndeletions >= max_deletions)) {		Debug(61, 1, ("Reorganizing database after %d deletions\n", ndeletions));		ndeletions = 0;		gdbm_sync(dbf);	/* sync to disk */		if (gdbm_reorganize(dbf))			errorlog("gdbm_reorganize: %s: %s\n", dbfile, gdbm_strerror(gdbm_errno));	}}/* *  dbcheck_timestamp() - Checks to see if the given URL has changed *  since the given timestamp.  If it has not changed, then it will try *  to refresh the object.  Returns non-zero if the URL is unchanged; *  otherwise, returns 0. */int dbcheck_timestamp(url, timestamp)     char *url;     int timestamp;{	datum d, k;	int t;	if (null_filter || url == NULL || timestamp < 1)		return (0);	k.dptr = url;	k.dsize = strlen(k.dptr) + 1;	d = gdbm_fetch(idbf, k);	if (d.dptr == NULL)		return (0);	/* not in the INDEX database */	/* INDEX data includes the terminating \0 */	t = atoi(d.dptr);	xfree(d.dptr);	/* Compare the timestamps */	if ((t < 0) || (timestamp > t))		/* changed */		return (0);	dbcheck_refresh(url);	/* unchanged */	return (1);}/* *  dbcheck_md5() - Checks to see if the given URL has changed *  since the given md5.  If it has not changed, then it will try *  to refresh the object.  Returns non-zero if the URL is unchanged; *  otherwise, returns 0. */int dbcheck_md5(url, md5)     char *url;     char *md5;{	datum d, k;	if (null_filter || md5 == NULL || url == NULL)		return (0);	k.dptr = url;	k.dsize = strlen(k.dptr) + 1;	d = gdbm_fetch(mdbf, k);	if (d.dptr == NULL)		return (0);	/* not in the MD5 database */	/* MD5 from GDBM includes the terminating \0 */	if (strcmp(md5, d.dptr) != 0) {		xfree(d.dptr);		return (0);	/* it has changed */	}	xfree(d.dptr);	dbcheck_refresh(url);	/* it has not changed */	return (1);}/* *  dbcheck_refresh() - Update the timestamp for URL.  Logs either *  "Unchanged" or "Refreshed". */static void dbcheck_refresh(url)     char *url;{	static int ndone = 0;	datum d, k;	Template *t;	AVPair *avp;	int refresh_rate, update_time, ttl;	time_t current_time;	char tbuf[BUFSIZ];	Buffer *b;	if (null_filter || rdbf == NULL) {		Log("Unchanged: %s\n", url);		return;	}	if (ndone == -1 || (max_refresh > 0 && ndone >= max_refresh)) {		if (ndone != -1) {			Log("WARNING: Reached refresh maximum: %d objects.\n",			    ndone);		}		ndone = -1;		Log("Unchanged (with no refresh): %s\n", url);		return;	}	/* Grab the PRODUCTION copy of the object */	k.dptr = url;	k.dsize = strlen(k.dptr) + 1;	d = gdbm_fetch(pdbf, k);	if (d.dptr == NULL) {		errorlog("dbcheck_refresh: refreshing non-existant object?: %s\n", url);		Log("Unchanged: %s\n", url);		return;	}	/* Parse the object to find it's update time */	init_parse_template_string(d.dptr, d.dsize);	if ((t = parse_template()) == NULL) {		errorlog("dbcheck_refresh: Corrupt SOIF object: %s\n", url);		xfree(d.dptr);		Log("Unchanged: %s\n", url);		return;	}	finish_parse_template();	xfree(d.dptr);	/* Grab the refresh rate */	refresh_rate = DEFAULT_REFRESH;		/* default */	if ((avp = extract_AVPair(t->list, T_REFRESH)) != NULL) {		refresh_rate = atoi(avp->value);	}	/* Grab the TTL */	ttl = DEFAULT_TTL;	/* default */	if ((avp = extract_AVPair(t->list, T_TTL)) != NULL) {		ttl = atoi(avp->value);	}	/*      Make sure the refresh rate is never less than   **	 * **   the TTL.                                        */	if (ttl < refresh_rate)		refresh_rate = ttl;	/* Grab the update time */	if ((avp = extract_AVPair(t->list, T_UPDATE)) == NULL) {		free_template(t);		errorlog("dbcheck_refresh: Illegal SOIF: %s: No %s attribute.\n", url, T_UPDATE);		Log("Unchanged: %s\n", url);		return;	}	update_time = atoi(avp->value);	/*	 *  See if the object is ready for a refresh.  If it isn't, then	 *  ignore the request; otherwise, update the timestamp.	 *	 *  An object should be refreshed if the time it was created	 *  (Update-Time) plus the Refresh-Rate has expired.	 */	current_time = time(NULL);	if ((update_time + refresh_rate) > current_time) {		free_template(t);		Log("Unchanged: %s\n", url);		return;	}	Log("Refreshing: %s\n", url);	/* Directly replace the update time */	xfree(avp->value);	sprintf(tbuf, "%u", (unsigned int) current_time);	avp->value = strdup(tbuf);	avp->vsize = strlen(avp->value);	/* Write the new object to the refresh database */	if (((b = soif_to_buffer(t)) == NULL) || (b->length < 1)) {		errorlog("dbcheck_refresh: Internal error!\n");		return;	}	d.dptr = b->data;	d.dsize = b->length;	k.dptr = url;	k.dsize = strlen(k.dptr) + 1;	if (!gdbm_exists(rdbf, k) && gdbm_store(rdbf, k, d, GDBM_INSERT)) {		Log("WARNING: Cannot refresh: %s: %s\n", url,		    gdbm_strerror(gdbm_errno));	} else {		ndone++;	}	/* Clean up */	free_buffer(b);	b = NULL;}/* *  soif_to_buffer() - Prints the in-memory SOIF template to an *  in-memory buffer.  The given Template is no longer valid on exit. *  Must call free_buffer() after this call. */static Buffer *soif_to_buffer(t)     Template *t;{	static Buffer *b;	FILE *fp;	char *tfile, buf[BUFSIZ];	int n;	/* For large SOIF objects you need 2x the size of the object in mem */	if (!memefficient) {		/* Just print the string and return the buffer */		b = init_print_template(NULL);		print_template(t);		free_template(t);		return (b);	}	/*	 *  This would not be very memory efficient since we keep the	 *  current template and the new version both in memory.	 *  The more memory efficient solution is below.  We write the	 *  template to a file, free the template, then read the file	 *  into a buffer, then write the data to the database.	 */	tfile = tempnam(NULL, "stb");	if ((fp = fopen(tfile, "w")) == NULL) {		/* fallback */		b = init_print_template(NULL);		print_template(t);		free_template(t);		xfree(tfile);		return (b);	}	(void) init_print_template(fp);	print_template(t);	finish_print_template();	free_template(t);	fclose(fp);	if ((fp = fopen(tfile, "r")) == NULL) {		/* fatal error! */		(void) unlink(tfile);		xfree(tfile);		return (NULL);	}	b = create_buffer(BUFSIZ);	while ((n = fread(buf, 1, BUFSIZ, fp)) > 0) {		add_buffer(b, buf, n);	}	fclose(fp);	(void) unlink(tfile);	xfree(tfile);	return (b);}void db_delete_byurl(url)     char *url;{	datum k;	k.dptr = url;	k.dsize = strlen(url) + 1;	(void) gdbm_delete(dbf, k);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -