📄 cache.c
字号:
static char rcsid[] = "$Id: cache.c,v 2.1 1997/03/21 18:01:13 sxw Exp $";/* * cache.c - Simple, local disk cache for liburl. * Uses a GDBM file to map URLs to the cached files. Uses links to copy * files. Locks out other processes that might make modifications to the * cache by using the mutual exclusion protection of GDBM. Maintains a * Cache.size file that has the number of bytes in the cache. * * DEBUG: section 22, level 1 Common liburl disk cache routines * AUTHOR: Harvest derived * * Harvest Indexer http://harvest.sourceforge.net/ * ----------------------------------------------- * * The Harvest Indexer is a continued development of code developed by * the Harvest Project. Development is carried out by numerous individuals * in the Internet community, and is not officially connected with the * original Harvest Project or its funding sources. * * Please mail harvest@tardis.ed.ac.uk if you are interested in participating * in the development effort. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <unistd.h>#include <errno.h>#include <fcntl.h>#include <time.h>#include <sys/time.h>#include <sys/types.h>#include <sys/stat.h>#include <gdbm.h>#include "util.h"#include "url.h"/* * Try HAVE_SRAND48, then try HAVE_SRANDOM, otherwise assume HAVE_SRAND *//* * CACHE_TTL - number of seconds that makes cached files invalid */#ifndef CACHE_TTL#define CACHE_TTL (1 * 7 * 24 * 60 * 60) /* 1 week */#endif/* * USE_CACHE_TMPDIR is the default temporary directory of where to * place the cache, or the environment variable TMPDIR is used. * This directory MUST be on the same partition as TMPDIR, since we * use link(2) for copying. */#ifndef USE_CACHE_TMPDIR#define USE_CACHE_TMPDIR "/tmp"#endif/* Local variables */static char cachedir[BUFSIZ];static char cachetable[BUFSIZ];static char cachesize[BUFSIZ];static time_t watermark;static GDBM_FILE dbf = NULL;static int max_cache_size = (32 * 1024 * 1024); /* 32 MBs */static int cache_ttl = CACHE_TTL;static GDBM_FILE lm_dbf = NULL;static char lmttable[BUFSIZ];/* Local functions */static void delete_cache_entry();static void get_access();static void release_access();static void die();static int get_cachesize();static void change_cachesize();static void delete_cache_url();static char *next_filename();static void init_next_filename();static void die(){ if (lm_dbf != NULL) gdbm_close(lm_dbf); lm_dbf = NULL; if (dbf != NULL) gdbm_close(dbf); dbf = NULL; exit(1);}/* * finish_cache() - Cleanup the cache. */void finish_cache(){ if (lm_dbf != NULL) gdbm_close(lm_dbf); lm_dbf = NULL; if (dbf != NULL) gdbm_close(dbf); dbf = NULL;}/* * init_cache() - Startup the cache */void init_cache(){ char *s = getenv("TMPDIR"); struct stat sb; /* Create a directory in which to cache the files */ sprintf(cachedir, "%s/cache-liburl", s ? s : USE_CACHE_TMPDIR); (void) mkdir(cachedir, 0755); if (access(cachedir, W_OK)) { errorlog("Cannot use %s\n", cachedir); die(); } init_next_filename(cachedir); sprintf(cachetable, "%s/Cache.gdbm", cachedir); sprintf(cachesize, "%s/Cache.size", cachedir); if (access(cachetable, F_OK)) { dbf = gdbm_open(cachetable, 0, GDBM_NEWDB, 0664, NULL); if (dbf == NULL) { if ((gdbm_errno != GDBM_CANT_BE_WRITER) && (gdbm_errno != GDBM_CANT_BE_READER)) { errorlog("GDBM ERROR: gdbm_open: %s: %s\n", cachetable, gdbm_strerror(gdbm_errno)); die(); } } else gdbm_close(dbf); } dbf = NULL; /* * watermark was used for comparing cached object time with time on * GDBM file. Now we use the the current time instead so this could * go away -DW */ if (stat(cachetable, &sb) < 0) { log_errno(cachetable); watermark = 0; } else { watermark = sb.st_mtime; } watermark = watermark > 0 ? watermark : 0; sprintf(lmttable, "%s/LMT.gdbm", cachedir); if (access(lmttable, F_OK)) { lm_dbf = gdbm_open(lmttable, 0, GDBM_NEWDB, 0664, NULL); if (lm_dbf == NULL) { if ((gdbm_errno != GDBM_CANT_BE_WRITER) && (gdbm_errno != GDBM_CANT_BE_READER)) { errorlog("GDBM ERROR: gdbm_open: %s: %s\n", lmttable, gdbm_strerror(gdbm_errno)); die(); } } else gdbm_close(lm_dbf); } lm_dbf = NULL;#if defined(HAVE_SRAND48) (void) srand48((long) time(NULL));#elif defined(HAVE_SRANDOM) (void) srandom((unsigned) time(NULL));#else (void) srand(time(NULL));#endif max_cache_size = 32; if ((s = getenv("HARVEST_MAX_LOCAL_CACHE")) != NULL) max_cache_size = atoi(s); if (max_cache_size < 0) max_cache_size = 32; max_cache_size *= 1024 * 1024; cache_ttl = CACHE_TTL; if ((s = getenv("GATHERER_CACHE_TTL")) != NULL) cache_ttl = atoi(s); if (cache_ttl < 0) cache_ttl = CACHE_TTL;}/* * get_access() - Obtains access to GDBM database table. Blocks until * it can obtain access. Locks all other liburl's from the cache table. */static void get_access(flag) int flag;{ while (1) { dbf = gdbm_open(cachetable, 0, flag, 0664, NULL); if (dbf != NULL) break; if ((gdbm_errno != GDBM_CANT_BE_WRITER) && (gdbm_errno != GDBM_CANT_BE_READER)) { errorlog("GDBM ERROR: gdbm_open: %s: %s\n", cachetable, gdbm_strerror(gdbm_errno)); die(); }#ifdef HAVE_USLEEP#if defined(HAVE_SRAND48) (void) usleep((lrand48() % 200) + 10); /* wait a random amount */#elif defined(HAVE_SRANDOM) (void) usleep((random() % 200) + 10); /* wait a random amount */#else (void) usleep((rand() % 200) + 10); /* wait a random amount */#endif#else { struct timeval sleep; sleep.tv_sec = 0;#if defined(HAVE_SRAND48) sleep.tv_usec = (lrand48() % 200) + 10;#elif defined(HAVE_SRANDOM) sleep.tv_usec = (random() % 200) + 10;#else sleep.tv_usec = (rand() % 200) + 10;#endif#ifndef _HARVEST_HPUX_ select(0, (fd_set *) 0, (fd_set *) 0, (fd_set *) 0, &sleep);#else /* _HARVEST_HPUX_ */ select(0, (int *) 0, (int *) 0, (int *) 0, &sleep);#endif /* _HARVEST_HPUX_ */ }#endif } /* * this should be safe. Only open this DB after the other has * been opened. */ lm_dbf = gdbm_open(lmttable, 0, flag, 0664, NULL); if (lm_dbf == NULL) { errorlog("GDBM ERROR: gdbm_open: %s: %s\n", lmttable, gdbm_strerror(gdbm_errno)); die(); }}/* * release_access() - Releases access to the GDBM database. */static void release_access(){ if (lm_dbf != NULL) gdbm_close(lm_dbf); lm_dbf = NULL; if (dbf != NULL) gdbm_close(dbf); dbf = NULL;}/* * get_cache_filename() - Generates a unique filename to store in the cache */static char *get_cache_filename(){ static char *s; while (1) { if ((s = next_filename()) == NULL) return (NULL); if (access(s, F_OK)) return (s); xfree(s); } return (NULL);}/* * add_cache() - Add the URL,filename to the cache. */void add_cache(url, filename, lmt) char *url; char *filename; time_t lmt;{ datum k, d; char *cfile; struct stat sb; int ndeletes = 0, current_size; int status; /* Find out some more about the file */ if (lstat(filename, &sb) < 0) { log_errno(filename); return; } if (!S_ISREG(sb.st_mode)) return; get_access(GDBM_WRCREAT); /* LOCK */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -