⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cache.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
	/* See if we have room, if not delete an object */	while (1) {		if (ndeletes > 3) {	/* Try 3 times to reduce cache size */			release_access();			return;	/* RELEASE and give up */		}		current_size = get_cachesize();		Debug(22, 1, ("add_cache: Current Cache Size is %d\n",			current_size));		if (sb.st_size + current_size > max_cache_size) {			delete_cache_entry();			ndeletes++;		} else {			break;	/* We're ready to add */		}	}	/* Cache the file; by link'ing it */	cfile = get_cache_filename();	Debug(22, 1, ("add_cache: Linking %s to %s\n", cfile, filename));	if (link(filename, cfile) < 0) {		/* must copy the file */		int n;		char *buf = NULL;		int rfd, wfd;		if ((wfd = open(cfile, O_WRONLY | O_CREAT, 0666)) < 0) {			log_errno2(__FILE__, __LINE__, cfile);			release_access();	/* RELEASE  and give up */			xfree(cfile);			return;		}		if ((rfd = open(filename, O_RDONLY)) < 0) {			log_errno2(__FILE__, __LINE__, filename);			release_access();	/* RELEASE  and give up */			xfree(cfile);			close(wfd);             /* Close the file handle */			return;		}		buf = (char *) xmalloc(4096);		while ((n = read(rfd, buf, 4096)) > 0)			write(wfd, buf, n);		close(rfd);		close(wfd);		xfree(buf);	}	/* Add the file to the cache */	k.dptr = xstrdup(url);	k.dsize = strlen(k.dptr) + 1;	d.dptr = xstrdup(cfile);	d.dsize = strlen(d.dptr) + 1;	Debug(22, 1, ("add_cache: Adding %s -> %s\n", k.dptr, d.dptr));	status = gdbm_store(dbf, k, d, GDBM_INSERT);	if (status) {		/* already an entry for URL */		Debug(22, 1, ("add_cache: GDBM_ERROR: gdbm_store: key (%s): %s: %s\n", k.dptr, cfile, gdbm_strerror(gdbm_errno)));		(void) unlink(cfile);		xfree(k.dptr);		xfree(d.dptr);		xfree(cfile);		release_access();	/* RELEASE */		return;	}	xfree(k.dptr);	xfree(d.dptr);	xfree(cfile);	change_cachesize(sb.st_size);	/* Add the LMT */	k.dptr = xstrdup(url);	k.dsize = strlen(k.dptr) + 1;	d.dptr = (char *) &lmt;	d.dsize = sizeof(lmt);	Debug(22, 5, ("Storing LMT=%d for %s\n", lmt, url));	status = gdbm_store(lm_dbf, k, d, GDBM_INSERT);	if (status) {		/* already an entry for URL */		Debug(22, 1, ("add_cache: GDBM_ERROR: gdbm_store: url=%s LMT=%ld: %s\n", k.dptr, lmt, gdbm_strerror(gdbm_errno)));#if 0		/* don't need this kjl/24feb2002 */		release_access();	/* RELEASE */		return;#endif	}	xfree(k.dptr);	release_access();	/* RELEASE */}/* *  lookup_cache() - Checks to see if the URL is already cached on the *  local disk.  If it is, then it returns a pointer to a copy of the file. */char *lookup_cache(url)     char *url;{	static char *filecopy;	datum k, d;	struct stat sb;	Debug(22, 1, ("lookup_cache: %s\n", url));	k.dptr = xstrdup(url);	k.dsize = strlen(url) + 1;	get_access(GDBM_READER);	/* LOCK */	d = gdbm_fetch(dbf, k);	xfree(k.dptr);	if (d.dptr == NULL) {		release_access();	/* RELEASE */		xfree(d.dptr);		return (NULL);	}	/* make sure its really there */	if (stat(d.dptr, &sb) < 0) {		release_access();	/* RELEASE */		delete_cache_url(d.dptr);		xfree(d.dptr);		return NULL;	}	filecopy = tempnam(cachedir, "cache");	Debug(22, 1, ("lookup_cache: CACHE HIT: Linking %s to %s\n",		filecopy, d.dptr));	if (link(d.dptr, filecopy) < 0) {		if (symlink(d.dptr, filecopy) < 0) {	/* try symlink(2) */			log_errno(filecopy);			xfree(filecopy);			filecopy = NULL;		}	}	release_access();	/* RELEASE */	xfree(d.dptr);	return (filecopy);}/* *  delete_cache_url() - deletes the cache entry for the URL. */static void delete_cache_url(url)     char *url;{	datum k, d;	struct stat sb;	Debug(22, 1, ("delete_cache_url: %s\n", url));	k.dptr = xstrdup(url);	k.dsize = strlen(k.dptr) + 1;	d = gdbm_fetch(dbf, k);	if (d.dptr == NULL) {		errorlog("URL %s is not in the cache.\n", url);		return;	}	if (stat(d.dptr, &sb) < 0) {		log_errno(d.dptr);		sb.st_size = 0;	}	(void) unlink(d.dptr);	xfree(d.dptr);	change_cachesize(-sb.st_size);	(void) gdbm_delete(dbf, k);	xfree(k.dptr);	Debug(22, 1, ("delete_cache_url (LMT): %s\n", url));	k.dptr = xstrdup(url);	k.dsize = strlen(k.dptr) + 1;	d = gdbm_fetch(lm_dbf, k);	if (d.dptr == NULL) {		errorlog("not in LMT.gdbm: %s\n", url);		return;	}	(void) gdbm_delete(dbf, k);	xfree(d.dptr);	xfree(k.dptr);}/* *  delete_cache_entry() - remove the file that was accessed last. *  ASSUMES that get_access() has already been called. */static void delete_cache_entry(){	datum k, nk;	Debug(22, 1, ("delete_cache_entry: deleting...\n"));	/* Randomly select an entry to delete */	k = gdbm_firstkey(dbf);	while (k.dptr) {		nk = gdbm_nextkey(dbf, k);#if   defined(HAVE_SRAND48)		if (lrand48() % 13 == 0) {	/* delete a random entry */#elif defined(HAVE_SRANDOM)			if (random() % 13 == 0) {	/* delete a random entry */#else		if (rand() % 13 == 0) {		/* delete a random entry */#endif			delete_cache_url(k.dptr);		}		xfree(k.dptr);		k = nk;	}	/*	 *  Oops, we didn't select any during the random traversal,	 *  so just delete the first one	 */	k = gdbm_firstkey(dbf);	if (k.dptr) {		delete_cache_url(k.dptr);		xfree(k.dptr);	}}/* *  get_cachesize() - Returns the number of bytes in the cache */static int get_cachesize(){	FILE *fp;	int sz = 0;	if ((fp = fopen(cachesize, "r")) == NULL) {		return (0);	}	if (fscanf(fp, "%d", &sz) != 1) {		fclose(fp);		return (0);	}	fclose(fp);	return (sz < 0 ? 0 : sz);}/* *  change_cachesize() - Changes the cache size by n bytes; */static void change_cachesize(n)     int n;{	FILE *fp;	int sz = get_cachesize();	if ((fp = fopen(cachesize, "w+")) == NULL) {		log_errno(cachesize);		die();	}	fprintf(fp, "%d", sz + n);	fclose(fp);}#define myabs(a)	((a) < 0 ? -(a) : (a))/* *  expire_cache() - Removes any cached files that are older than CACHE_TTL. */void expire_cache(){	datum k, nk, d;	struct stat sb;	time_t now = time(0);	Debug(22, 1, ("expire_cache: Deleting expired entries...\n"));	Debug(22, 1, ("expire_cache: local-disk cache in %s...\n", cachedir));	get_access(GDBM_WRITER);	/* LOCK */	/* Walk each cache file and delete if necessary */	k = gdbm_firstkey(dbf);	while (k.dptr) {		nk = gdbm_nextkey(dbf, k);		d = gdbm_fetch(dbf, k);		if (d.dptr == NULL) {			errorlog("Internal Error: %s not in cache table\n",			    k.dptr);			die();		}		if (stat(d.dptr, &sb) < 0) {			log_errno(d.dptr);			delete_cache_url(k.dptr);		} else if (myabs(now - sb.st_mtime) > cache_ttl) {			delete_cache_url(k.dptr);		}		xfree(d.dptr);		xfree(k.dptr);		k = nk;	}	release_access(GDBM_WRITER);	/* RELEASE */}#undef myabsstatic char *topdir = NULL;static void init_next_filename(dirname)     char *dirname;		/* name of top level directory to store files */{	if (dirname == NULL)		return;	topdir = xstrdup(dirname);#if   defined(HAVE_SRAND48)	(void) srand48((long) time(NULL));#elif defined(HAVE_SRANDOM)	(void) srandom((unsigned) time(NULL));#else	(void) srand(time(NULL));#endif}static char *next_filename(){	static char p[BUFSIZ];#if   defined(HAVE_SRAND48)	unsigned n = lrand48() % 100000000;	/* 8 digits */#elif defined(HAVE_SRANDOM)	unsigned n = random() % 100000000;	/* 8 digits */#else	unsigned n = rand() % 100000000;	/* 8 digits */#endif	sprintf(p, "%s/%02d", topdir, (int) n / 1000000);	/* first 2 digits */	if (mkdir(p, 0755) < 0) {		if (errno != EEXIST) {			log_errno(p);			return (NULL);		}	}	/* Entire filename uses directory (2 digits) and 6 digits for name */	sprintf(p, "%s/%02d/%06d", topdir, (int) n / 1000000, (int) n % 1000000);	return (xstrdup(p));}/* *  lmt_cache() - Returns the Last-Modification-Time for the cache hit. */time_t lmt_cache(url)     char *url;{	datum k, d;	char *filename;	struct stat sb;	time_t t;	Debug(22, 1, ("lmt_cache: %s\n", url));	k.dptr = xstrdup(url);	k.dsize = strlen(url) + 1;	get_access(GDBM_READER);	/* LOCK */	Debug(22, 5, ("Looking up LMT: %s\n", url));	d = gdbm_fetch(lm_dbf, k);	xfree(k.dptr);	if (d.dptr != NULL) {		t = *((time_t *) d.dptr);		Debug(22, 5, ("--> Found LMT=%d in LMT.gdbm.\n", t));		release_access();	/* RELEASE */		xfree(d.dptr);		return t;	}	d = gdbm_fetch(dbf, k);	xfree(k.dptr);	if (d.dptr == NULL) {		release_access();	/* RELEASE */		xfree(d.dptr);		return ((time_t) NULL);	}	filename = xstrdup(d.dptr);	xfree(d.dptr);	release_access();	/* RELEASE */	t = (stat(filename, &sb) < 0) ? 0 : sb.st_mtime;	xfree(filename);	return (t);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -