dbz.c

来自「gcc-2.95.3 Linux下最常用的C编译器」· C语言代码 · 共 1,769 行 · 第 1/3 页
1,769 行
/*dbz.c  V3.2Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)You can use this code in any manner, as long as you leave my name on itand don't hold me responsible for any problems with it.Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun  5 00:27:08 CDT 1988Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)Major reworking by Henry Spencer as part of the C News project.These routines replace dbm as used by the usenet news software(it's not a full dbm replacement by any means).  It's fast andsimple.  It contains no AT&T code.In general, dbz's files are 1/20 the size of dbm's.  Lookup performanceis somewhat better, while file creation is spectacularly faster, especiallyif the incore facility is used.*/#include <stdio.h>#include <sys/types.h>#include <string.h>#include <ctype.h>#include <errno.h>#ifndef __STDC__extern int errno;#endif#include <dbz.h>/* * #ifdef index.  "LIA" = "leave it alone unless you know what you're doing". * * FUNNYSEEKS	SEEK_SET is not 0, get it from <unistd.h> * INDEX_SIZE	backward compatibility with old dbz; avoid using this * NMEMORY	number of days of memory for use in sizing new table (LIA) * INCORE	backward compatibility with old dbz; use dbzincore() instead * DBZDEBUG	enable debugging * DEFSIZE	default table size (not as critical as in old dbz) * OLDBNEWS	default case mapping as in old B News; set NOBUFFER * BNEWS	default case mapping as in current B News; set NOBUFFER * DEFCASE	default case-map algorithm selector * NOTAGS	fseek offsets are strange, do not do tagging (see below) * NPAGBUF	size of .pag buffer, in longs (LIA) * SHISTBUF	size of ASCII-file buffer, in bytes (LIA) * MAXRUN	length of run which shifts to next table (see below) (LIA) * OVERFLOW	long-int arithmetic overflow must be avoided, will trap * NOBUFFER	do not buffer hash-table i/o, B News locking is defective */#ifdef FUNNYSEEKS#include <unistd.h>#else#define	SEEK_SET	0#endif#ifdef OVERFLOW#include <limits.h>#endifstatic int dbzversion = 3;	/* for validating .dir file format *//* * The dbz database exploits the fact that when news stores a <key,value> * tuple, the `value' part is a seek offset into a text file, pointing to * a copy of the `key' part.  This avoids the need to store a copy of * the key in the dbz files.  However, the text file *must* exist and be * consistent with the dbz files, or things will fail. * * The basic format of the database is a simple hash table containing the * values.  A value is stored by indexing into the table using a hash value * computed from the key; collisions are resolved by linear probing (just * search forward for an empty slot, wrapping around to the beginning of * the table if necessary).  Linear probing is a performance disaster when * the table starts to get full, so a complication is introduced.  The * database is actually one *or more* tables, stored sequentially in the * .pag file, and the length of linear-probe sequences is limited.  The * search (for an existing item or an empty slot) always starts in the * first table, and whenever MAXRUN probes have been done in table N, * probing continues in table N+1.  This behaves reasonably well even in * cases of massive overflow.  There are some other small complications * added, see comments below. * * The table size is fixed for any particular database, but is determined * dynamically when a database is rebuilt.  The strategy is to try to pick * the size so the first table will be no more than 2/3 full, that being * slightly before the point where performance starts to degrade.  (It is * desirable to be a bit conservative because the overflow strategy tends * to produce files with holes in them, which is a nuisance.) *//* * The following is for backward compatibility. */#ifdef INDEX_SIZE#define	DEFSIZE	INDEX_SIZE#endif/* * ANSI C says the offset argument to fseek is a long, not an off_t, for some * reason.  Let's use off_t anyway. */#define	SOF	(sizeof(off_t))/* * We assume that unused areas of a binary file are zeros, and that the * bit pattern of `(off_t)0' is all zeros.  The alternative is rather * painful file initialization.  Note that okayvalue(), if OVERFLOW is * defined, knows what value of an offset would cause overflow. */#define	VACANT		((off_t)0)#define	BIAS(o)		((o)+1)		/* make any valid off_t non-VACANT */#define	UNBIAS(o)	((o)-1)		/* reverse BIAS() effect *//* * In a Unix implementation, or indeed any in which an off_t is a byte * count, there are a bunch of high bits free in an off_t.  There is a * use for them.  Checking a possible hit by looking it up in the base * file is relatively expensive, and the cost can be dramatically reduced * by using some of those high bits to tag the value with a few more bits * of the key's hash.  This detects most false hits without the overhead of * seek+read+strcmp.  We use the top bit to indicate whether the value is * tagged or not, and don't tag a value which is using the tag bits itself. * We're in trouble if the off_t representation wants to use the top bit. * The actual bitmasks and offset come from the configuration stuff, * which permits fiddling with them as necessary, and also suppressing * them completely (by defining the masks to 0).  We build pre-shifted * versions of the masks for efficiency. */static off_t tagbits;		/* pre-shifted tag mask */static off_t taghere;		/* pre-shifted tag-enable bit */static off_t tagboth;		/* tagbits|taghere */#define	HASTAG(o)	((o)&taghere)#define	TAG(o)		((o)&tagbits)#define	NOTAG(o)	((o)&~tagboth)#define	CANTAG(o)	(((o)&tagboth) == 0)#define	MKTAG(v)	(((v)<<conf.tagshift)&tagbits)/* * A new, from-scratch database, not built as a rebuild of an old one, * needs to know table size, casemap algorithm, and tagging.  Normally * the user supplies this info, but there have to be defaults. */#ifndef DEFSIZE#define	DEFSIZE	120011		/* 300007 might be better */#endif#ifdef OLDBNEWS#define	DEFCASE	'0'		/* B2.10 -- no mapping */#define	NOBUFFER		/* B News locking is defective */#endif#ifdef BNEWS#define	DEFCASE	'='		/* B2.11 -- all mapped */#define	NOBUFFER		/* B News locking is defective */#endif#ifndef DEFCASE			/* C News compatibility is the default */#define	DEFCASE	'C'		/* C News -- RFC822 mapping */#endif#ifndef NOTAGS#define	TAGENB	0x80		/* tag enable is top bit, tag is next 7 */#define	TAGMASK	0x7f#define	TAGSHIFT	24#else#define	TAGENB	0		/* no tags */#define	TAGMASK	0#define	TAGSHIFT	0#endif/* * We read configuration info from the .dir file into this structure, * so we can avoid wired-in assumptions for an existing database. * * Among the info is a record of recent peak usages, so that a new table * size can be chosen intelligently when rebuilding.  10 is a good * number of usages to keep, since news displays marked fluctuations * in volume on a 7-day cycle. */struct dbzconfig {	int olddbz;		/* .dir file empty but .pag not? */	off_t tsize;		/* table size */#	ifndef NMEMORY#	define	NMEMORY	10	/* # days of use info to remember */#	endif#	define	NUSEDS	(1+NMEMORY)	off_t used[NUSEDS];	/* entries used today, yesterday, ... */	int valuesize;		/* size of table values, == SOF */	int bytemap[SOF];	/* byte-order map */	char casemap;		/* case-mapping algorithm (see cipoint()) */	char fieldsep;		/* field separator in base file, if any */	off_t tagenb;		/* unshifted tag-enable bit */	off_t tagmask;		/* unshifted tag mask */	int tagshift;		/* shift count for tagmask and tagenb */};static struct dbzconfig conf;static int getconf();static long getno();static int putconf();static void mybytemap();static off_t bytemap();/*  * For a program that makes many, many references to the database, it * is a large performance win to keep the table in core, if it will fit. * Note that this does hurt robustness in the event of crashes, and * dbmclose() *must* be called to flush the in-core database to disk. * The code is prepared to deal with the possibility that there isn't * enough memory.  There *is* an assumption that a size_t is big enough * to hold the size (in bytes) of one table, so dbminit() tries to figure * out whether this is possible first. * * The preferred way to ask for an in-core table is to do dbzincore(1) * before dbminit().  The default is not to do it, although -DINCORE * overrides this for backward compatibility with old dbz. * * We keep only the first table in core.  This greatly simplifies the * code, and bounds memory demand.  Furthermore, doing this is a large * performance win even in the event of massive overflow. */#ifdef INCOREstatic int incore = 1;#elsestatic int incore = 0;#endif/* * Stdio buffer for .pag reads.  Buffering more than about 16 does not help * significantly at the densities we try to maintain, and the much larger * buffers that most stdios default to are much more expensive to fill. * With small buffers, stdio is performance-competitive with raw read(), * and it's much more portable. */#ifndef NPAGBUF#define	NPAGBUF	16#endif#ifndef NOBUFFER#ifdef _IOFBFstatic off_t pagbuf[NPAGBUF];	/* only needed if !NOBUFFER && _IOFBF */#endif#endif/* * Stdio buffer for base-file reads.  Message-IDs (all news ever needs to * read) are essentially never longer than 64 bytes, and the typical stdio * buffer is so much larger that it is much more expensive to fill. */#ifndef SHISTBUF#define	SHISTBUF	64#endif#ifdef _IOFBFstatic char basebuf[SHISTBUF];		/* only needed if _IOFBF exists */#endif/* * Data structure for recording info about searches. */struct searcher {	off_t place;		/* current location in file */	int tabno;		/* which table we're in */	int run;		/* how long we'll stay in this table */#		ifndef MAXRUN#		define	MAXRUN	100#		endif	long hash;		/* the key's hash code (for optimization) */	off_t tag;		/* tag we are looking for */	int seen;		/* have we examined current location? */	int aborted;		/* has i/o error aborted search? */};static void start();#define	FRESH	((struct searcher *)NULL)static off_t search();#define	NOTFOUND	((off_t)-1)static int okayvalue();static int set();/* * Arguably the searcher struct for a given routine ought to be local to * it, but a fetch() is very often immediately followed by a store(), and * in some circumstances it is a useful performance win to remember where * the fetch() completed.  So we use a global struct and remember whether * it is current. */static struct searcher srch;static struct searcher *prevp;	/* &srch or FRESH *//* byte-ordering stuff */static int mybmap[SOF];			/* my byte order (see mybytemap()) */static int bytesame;			/* is database order same as mine? */#define	MAPIN(o)	((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))#define	MAPOUT(o)	((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))/* * The double parentheses needed to make this work are ugly, but the * alternative (under most compilers) is to pack around 2K of unused * strings -- there's just no way to get rid of them. */static int debug;			/* controlled by dbzdebug() */#ifdef DBZDEBUG#define DEBUG(args) if (debug) { (void) printf args ; }#else#define	DEBUG(args)	;#endif/* externals used */extern char *malloc();extern char *calloc();extern void free();		/* ANSI C; some old implementations say int */extern int atoi();extern long atol();/* misc. forwards */static long hash();static void crcinit();static char *cipoint();static char *mapcase();static int isprime();static FILE *latebase();/* file-naming stuff */static char dir[] = ".dir";static char pag[] = ".pag";static char *enstring();/* central data structures */static FILE *basef;		/* descriptor for base file */static char *basefname;		/* name for not-yet-opened base file */static FILE *dirf;		/* descriptor for .dir file */static int dirronly;		/* dirf open read-only? */static FILE *pagf = NULL;	/* descriptor for .pag file */static off_t pagpos;		/* posn in pagf; only search may set != -1 */static int pagronly;		/* pagf open read-only? */static off_t *corepag;		/* incore version of .pag file, if any */static FILE *bufpagf;		/* well-buffered pagf, for incore rewrite */static off_t *getcore();static int putcore();static int written;		/* has a store() been done? *//* - dbzfresh - set up a new database, no historical info */int				/* 0 success, -1 failure */dbzfresh(name, size, fs, cmap, tagmask)char *name;			/* base name; .dir and .pag must exist */long size;			/* table size (0 means default) */int fs;				/* field-separator character in base file */int cmap;			/* case-map algorithm (0 means default) */off_t tagmask;			/* 0 default, 1 no tags */{	register char *fn;	struct dbzconfig c;	register off_t m;	register FILE *f;	if (pagf != NULL) {		DEBUG(("dbzfresh: database already open\n"));		return(-1);	}	if (size != 0 && size < 2) {		DEBUG(("dbzfresh: preposterous size (%ld)\n", size));		return(-1);	}	/* get default configuration */	if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0)		return(-1);	/* "can't happen" */	/* and mess with it as specified */	if (size != 0)		c.tsize = size;	c.fieldsep = fs;	switch (cmap) {	case 0:	case '0':	case 'B':		/* 2.10 compat */		c.casemap = '0';	/* '\0' nicer, but '0' printable! */		break;	case '=':	case 'b':		/* 2.11 compat */		c.casemap = '=';		break;	case 'C':		c.casemap = 'C';		break;	case '?':		c.casemap = DEFCASE;		break;	default:		DEBUG(("dbzfresh case map `%c' unknown\n", cmap));		return(-1);		break;	}	switch (tagmask) {	case 0:			/* default */		break;	case 1:			/* no tags */		c.tagshift = 0;		c.tagmask = 0;		c.tagenb = 0;		break;	default:		m = tagmask;		c.tagshift = 0;		while (!(m&01)) {			m >>= 1;			c.tagshift++;		}		c.tagmask = m;		c.tagenb = (m << 1) & ~m;		break;	}	/* write it out */	fn = enstring(name, dir);	if (fn == NULL)		return(-1);	f = fopen(fn, "w");	free(fn);	if (f == NULL) {		DEBUG(("dbzfresh: unable to write config\n"));		return(-1);	}	if (putconf(f, &c) < 0) {		(void) fclose(f);		return(-1);	}	if (fclose(f) == EOF) {		DEBUG(("dbzfresh: fclose failure\n"));		return(-1);	}	/* create/truncate .pag */	fn = enstring(name, pag);	if (fn == NULL)		return(-1);	f = fopen(fn, "w");	free(fn);	if (f == NULL) {		DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));		return(-1);	} else		(void) fclose(f);	/* and punt to dbminit for the hard work */	return(dbminit(name));}/* - dbzsize - what's a good table size to hold this many entries? */longdbzsize(contents)long contents;			/* 0 means what's the default */{	register long n;	if (contents <= 0) {	/* foulup or default inquiry */		DEBUG(("dbzsize: preposterous input (%ld)\n", contents));		return(DEFSIZE);	}	n = (contents/2)*3;	/* try to keep table at most 2/3 full */	if (!(n&01))		/* make it odd */		n++;	DEBUG(("dbzsize: tentative size %ld\n", n));	while (!isprime(n))	/* and look for a prime */		n += 2;	DEBUG(("dbzsize: final size %ld\n", n));	return(n);}/* - isprime - is a number prime? * * This is not a terribly efficient approach. */static int			/* predicate */isprime(x)register long x;{	static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };	register int *ip;	register long div;	register long stop;	/* hit the first few primes quickly to eliminate easy ones */	/* this incidentally prevents ridiculously small tables */	for (ip = quick; (div = *ip) != 0; ip++)		if (x%div == 0) {			DEBUG(("isprime: quick result on %ld\n", (long)x));			return(0);		}	/* approximate square root of x */	for (stop = x; x/stop < stop; stop >>= 1)		continue;	stop <<= 1;	/* try odd numbers up to stop */	for (div = *--ip; div < stop; div += 2)		if (x%div == 0)			return(0);	return(1);}/* - dbzagain - set up a new database to be a rebuild of an old one */int				/* 0 success, -1 failure */dbzagain(name, oldname)char *name;			/* base name; .dir and .pag must exist */char *oldname;			/* base name; all must exist */{	register char *fn;	struct dbzconfig c;	register int i;	register long top;	register FILE *f;	register int newtable;	register off_t newsize;	if (pagf != NULL) {		DEBUG(("dbzagain: database already open\n"));		return(-1);	}	/* pick up the old configuration */	fn = enstring(oldname, dir);	if (fn == NULL)		return(-1);	f = fopen(fn, "r");	free(fn);	if (f == NULL) {		DEBUG(("dbzagain: cannot open old .dir file\n"));		return(-1);	}	i = getconf(f, (FILE *)NULL, &c);	(void) fclose(f);	if (i < 0) {		DEBUG(("dbzagain: getconf failed\n"));		return(-1);	}	/* tinker with it */	top = 0;	newtable = 0;	for (i = 0; i < NUSEDS; i++) {		if (top < c.used[i])			top = c.used[i];		if (c.used[i] == 0)			newtable = 1;	/* hasn't got full usage history yet */	}	if (top == 0) {		DEBUG(("dbzagain: old table has no contents!\n"));		newtable = 1;	}	for (i = NUSEDS-1; i > 0; i--)		c.used[i] = c.used[i-1];	c.used[0] = 0;	newsize = dbzsize(top);	if (!newtable || newsize > c.tsize)	/* don't shrink new table */		c.tsize = newsize;	/* write it out */	fn = enstring(name, dir);	if (fn == NULL)		return(-1);	f = fopen(fn, "w");	free(fn);	if (f == NULL) {		DEBUG(("dbzagain: unable to write new .dir\n"));		return(-1);	}	i = putconf(f, &c);	(void) fclose(f);	if (i < 0) {		DEBUG(("dbzagain: putconf failed\n"));		return(-1);	}	/* create/truncate .pag */	fn = enstring(name, pag);	if (fn == NULL)		return(-1);	f = fopen(fn, "w");	free(fn);	if (f == NULL) {		DEBUG(("dbzagain: unable to create/truncate .pag file\n"));		return(-1);
dbz.c - 源码说明

本页面展示了「gcc-2.95.3 Linux下最常用的C编译器」中的 dbz.c 源码文件，采用 C语言编程语言编写，共 1,769 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Linux相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?