📄 dbz.c
字号:
/*dbz.c V3.2Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)You can use this code in any manner, as long as you leave my name on itand don't hold me responsible for any problems with it.Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun 5 00:27:08 CDT 1988Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)Major reworking by Henry Spencer as part of the C News project.These routines replace dbm as used by the usenet news software(it's not a full dbm replacement by any means). It's fast andsimple. It contains no AT&T code.In general, dbz's files are 1/20 the size of dbm's. Lookup performanceis somewhat better, while file creation is spectacularly faster, especiallyif the incore facility is used.*/#include <stdio.h>#include <sys/types.h>#include <string.h>#include <ctype.h>#include <errno.h>#ifndef __STDC__extern int errno;#endif#include <dbz.h>/* * #ifdef index. "LIA" = "leave it alone unless you know what you're doing". * * FUNNYSEEKS SEEK_SET is not 0, get it from <unistd.h> * INDEX_SIZE backward compatibility with old dbz; avoid using this * NMEMORY number of days of memory for use in sizing new table (LIA) * INCORE backward compatibility with old dbz; use dbzincore() instead * DBZDEBUG enable debugging * DEFSIZE default table size (not as critical as in old dbz) * OLDBNEWS default case mapping as in old B News; set NOBUFFER * BNEWS default case mapping as in current B News; set NOBUFFER * DEFCASE default case-map algorithm selector * NOTAGS fseek offsets are strange, do not do tagging (see below) * NPAGBUF size of .pag buffer, in longs (LIA) * SHISTBUF size of ASCII-file buffer, in bytes (LIA) * MAXRUN length of run which shifts to next table (see below) (LIA) * OVERFLOW long-int arithmetic overflow must be avoided, will trap * NOBUFFER do not buffer hash-table i/o, B News locking is defective */#ifdef FUNNYSEEKS#include <unistd.h>#else#define SEEK_SET 0#endif#ifdef OVERFLOW#include <limits.h>#endifstatic int dbzversion = 3; /* for validating .dir file format *//* * The dbz database exploits the fact that when news stores a <key,value> * tuple, the `value' part is a seek offset into a text file, pointing to * a copy of the `key' part. This avoids the need to store a copy of * the key in the dbz files. However, the text file *must* exist and be * consistent with the dbz files, or things will fail. * * The basic format of the database is a simple hash table containing the * values. A value is stored by indexing into the table using a hash value * computed from the key; collisions are resolved by linear probing (just * search forward for an empty slot, wrapping around to the beginning of * the table if necessary). Linear probing is a performance disaster when * the table starts to get full, so a complication is introduced. The * database is actually one *or more* tables, stored sequentially in the * .pag file, and the length of linear-probe sequences is limited. The * search (for an existing item or an empty slot) always starts in the * first table, and whenever MAXRUN probes have been done in table N, * probing continues in table N+1. This behaves reasonably well even in * cases of massive overflow. There are some other small complications * added, see comments below. * * The table size is fixed for any particular database, but is determined * dynamically when a database is rebuilt. The strategy is to try to pick * the size so the first table will be no more than 2/3 full, that being * slightly before the point where performance starts to degrade. (It is * desirable to be a bit conservative because the overflow strategy tends * to produce files with holes in them, which is a nuisance.) *//* * The following is for backward compatibility. */#ifdef INDEX_SIZE#define DEFSIZE INDEX_SIZE#endif/* * ANSI C says the offset argument to fseek is a long, not an off_t, for some * reason. Let's use off_t anyway. */#define SOF (sizeof(off_t))/* * We assume that unused areas of a binary file are zeros, and that the * bit pattern of `(off_t)0' is all zeros. The alternative is rather * painful file initialization. Note that okayvalue(), if OVERFLOW is * defined, knows what value of an offset would cause overflow. */#define VACANT ((off_t)0)#define BIAS(o) ((o)+1) /* make any valid off_t non-VACANT */#define UNBIAS(o) ((o)-1) /* reverse BIAS() effect *//* * In a Unix implementation, or indeed any in which an off_t is a byte * count, there are a bunch of high bits free in an off_t. There is a * use for them. Checking a possible hit by looking it up in the base * file is relatively expensive, and the cost can be dramatically reduced * by using some of those high bits to tag the value with a few more bits * of the key's hash. This detects most false hits without the overhead of * seek+read+strcmp. We use the top bit to indicate whether the value is * tagged or not, and don't tag a value which is using the tag bits itself. * We're in trouble if the off_t representation wants to use the top bit. * The actual bitmasks and offset come from the configuration stuff, * which permits fiddling with them as necessary, and also suppressing * them completely (by defining the masks to 0). We build pre-shifted * versions of the masks for efficiency. */static off_t tagbits; /* pre-shifted tag mask */static off_t taghere; /* pre-shifted tag-enable bit */static off_t tagboth; /* tagbits|taghere */#define HASTAG(o) ((o)&taghere)#define TAG(o) ((o)&tagbits)#define NOTAG(o) ((o)&~tagboth)#define CANTAG(o) (((o)&tagboth) == 0)#define MKTAG(v) (((v)<<conf.tagshift)&tagbits)/* * A new, from-scratch database, not built as a rebuild of an old one, * needs to know table size, casemap algorithm, and tagging. Normally * the user supplies this info, but there have to be defaults. */#ifndef DEFSIZE#define DEFSIZE 120011 /* 300007 might be better */#endif#ifdef OLDBNEWS#define DEFCASE '0' /* B2.10 -- no mapping */#define NOBUFFER /* B News locking is defective */#endif#ifdef BNEWS#define DEFCASE '=' /* B2.11 -- all mapped */#define NOBUFFER /* B News locking is defective */#endif#ifndef DEFCASE /* C News compatibility is the default */#define DEFCASE 'C' /* C News -- RFC822 mapping */#endif#ifndef NOTAGS#define TAGENB 0x80 /* tag enable is top bit, tag is next 7 */#define TAGMASK 0x7f#define TAGSHIFT 24#else#define TAGENB 0 /* no tags */#define TAGMASK 0#define TAGSHIFT 0#endif/* * We read configuration info from the .dir file into this structure, * so we can avoid wired-in assumptions for an existing database. * * Among the info is a record of recent peak usages, so that a new table * size can be chosen intelligently when rebuilding. 10 is a good * number of usages to keep, since news displays marked fluctuations * in volume on a 7-day cycle. */struct dbzconfig { int olddbz; /* .dir file empty but .pag not? */ off_t tsize; /* table size */# ifndef NMEMORY# define NMEMORY 10 /* # days of use info to remember */# endif# define NUSEDS (1+NMEMORY) off_t used[NUSEDS]; /* entries used today, yesterday, ... */ int valuesize; /* size of table values, == SOF */ int bytemap[SOF]; /* byte-order map */ char casemap; /* case-mapping algorithm (see cipoint()) */ char fieldsep; /* field separator in base file, if any */ off_t tagenb; /* unshifted tag-enable bit */ off_t tagmask; /* unshifted tag mask */ int tagshift; /* shift count for tagmask and tagenb */};static struct dbzconfig conf;static int getconf();static long getno();static int putconf();static void mybytemap();static off_t bytemap();/* * For a program that makes many, many references to the database, it * is a large performance win to keep the table in core, if it will fit. * Note that this does hurt robustness in the event of crashes, and * dbmclose() *must* be called to flush the in-core database to disk. * The code is prepared to deal with the possibility that there isn't * enough memory. There *is* an assumption that a size_t is big enough * to hold the size (in bytes) of one table, so dbminit() tries to figure * out whether this is possible first. * * The preferred way to ask for an in-core table is to do dbzincore(1) * before dbminit(). The default is not to do it, although -DINCORE * overrides this for backward compatibility with old dbz. * * We keep only the first table in core. This greatly simplifies the * code, and bounds memory demand. Furthermore, doing this is a large * performance win even in the event of massive overflow. */#ifdef INCOREstatic int incore = 1;#elsestatic int incore = 0;#endif/* * Stdio buffer for .pag reads. Buffering more than about 16 does not help * significantly at the densities we try to maintain, and the much larger * buffers that most stdios default to are much more expensive to fill. * With small buffers, stdio is performance-competitive with raw read(), * and it's much more portable. */#ifndef NPAGBUF#define NPAGBUF 16#endif#ifndef NOBUFFER#ifdef _IOFBFstatic off_t pagbuf[NPAGBUF]; /* only needed if !NOBUFFER && _IOFBF */#endif#endif/* * Stdio buffer for base-file reads. Message-IDs (all news ever needs to * read) are essentially never longer than 64 bytes, and the typical stdio * buffer is so much larger that it is much more expensive to fill. */#ifndef SHISTBUF#define SHISTBUF 64#endif#ifdef _IOFBFstatic char basebuf[SHISTBUF]; /* only needed if _IOFBF exists */#endif/* * Data structure for recording info about searches. */struct searcher { off_t place; /* current location in file */ int tabno; /* which table we're in */ int run; /* how long we'll stay in this table */# ifndef MAXRUN# define MAXRUN 100# endif long hash; /* the key's hash code (for optimization) */ off_t tag; /* tag we are looking for */ int seen; /* have we examined current location? */ int aborted; /* has i/o error aborted search? */};static void start();#define FRESH ((struct searcher *)NULL)static off_t search();#define NOTFOUND ((off_t)-1)static int okayvalue();static int set();/* * Arguably the searcher struct for a given routine ought to be local to * it, but a fetch() is very often immediately followed by a store(), and * in some circumstances it is a useful performance win to remember where * the fetch() completed. So we use a global struct and remember whether * it is current. */static struct searcher srch;static struct searcher *prevp; /* &srch or FRESH *//* byte-ordering stuff */static int mybmap[SOF]; /* my byte order (see mybytemap()) */static int bytesame; /* is database order same as mine? */#define MAPIN(o) ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))#define MAPOUT(o) ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))/* * The double parentheses needed to make this work are ugly, but the * alternative (under most compilers) is to pack around 2K of unused * strings -- there's just no way to get rid of them. */static int debug; /* controlled by dbzdebug() */#ifdef DBZDEBUG#define DEBUG(args) if (debug) { (void) printf args ; }#else#define DEBUG(args) ;#endif/* externals used */extern char *malloc();extern char *calloc();extern void free(); /* ANSI C; some old implementations say int */extern int atoi();extern long atol();/* misc. forwards */static long hash();static void crcinit();static char *cipoint();static char *mapcase();static int isprime();static FILE *latebase();/* file-naming stuff */static char dir[] = ".dir";static char pag[] = ".pag";static char *enstring();/* central data structures */static FILE *basef; /* descriptor for base file */static char *basefname; /* name for not-yet-opened base file */static FILE *dirf; /* descriptor for .dir file */static int dirronly; /* dirf open read-only? */static FILE *pagf = NULL; /* descriptor for .pag file */static off_t pagpos; /* posn in pagf; only search may set != -1 */static int pagronly; /* pagf open read-only? */static off_t *corepag; /* incore version of .pag file, if any */static FILE *bufpagf; /* well-buffered pagf, for incore rewrite */static off_t *getcore();static int putcore();static int written; /* has a store() been done? *//* - dbzfresh - set up a new database, no historical info */int /* 0 success, -1 failure */dbzfresh(name, size, fs, cmap, tagmask)char *name; /* base name; .dir and .pag must exist */long size; /* table size (0 means default) */int fs; /* field-separator character in base file */int cmap; /* case-map algorithm (0 means default) */off_t tagmask; /* 0 default, 1 no tags */{ register char *fn; struct dbzconfig c; register off_t m; register FILE *f; if (pagf != NULL) { DEBUG(("dbzfresh: database already open\n")); return(-1); } if (size != 0 && size < 2) { DEBUG(("dbzfresh: preposterous size (%ld)\n", size)); return(-1); } /* get default configuration */ if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0) return(-1); /* "can't happen" */ /* and mess with it as specified */ if (size != 0) c.tsize = size; c.fieldsep = fs; switch (cmap) { case 0: case '0': case 'B': /* 2.10 compat */ c.casemap = '0'; /* '\0' nicer, but '0' printable! */ break; case '=': case 'b': /* 2.11 compat */ c.casemap = '='; break; case 'C': c.casemap = 'C'; break; case '?': c.casemap = DEFCASE; break; default: DEBUG(("dbzfresh case map `%c' unknown\n", cmap)); return(-1); break; } switch (tagmask) { case 0: /* default */ break; case 1: /* no tags */ c.tagshift = 0; c.tagmask = 0; c.tagenb = 0; break; default: m = tagmask; c.tagshift = 0; while (!(m&01)) { m >>= 1; c.tagshift++; } c.tagmask = m; c.tagenb = (m << 1) & ~m; break; } /* write it out */ fn = enstring(name, dir); if (fn == NULL) return(-1); f = fopen(fn, "w"); free(fn); if (f == NULL) { DEBUG(("dbzfresh: unable to write config\n")); return(-1); } if (putconf(f, &c) < 0) { (void) fclose(f); return(-1); } if (fclose(f) == EOF) { DEBUG(("dbzfresh: fclose failure\n")); return(-1); } /* create/truncate .pag */ fn = enstring(name, pag); if (fn == NULL) return(-1); f = fopen(fn, "w"); free(fn); if (f == NULL) { DEBUG(("dbzfresh: unable to create/truncate .pag file\n")); return(-1); } else (void) fclose(f); /* and punt to dbminit for the hard work */ return(dbminit(name));}/* - dbzsize - what's a good table size to hold this many entries? */longdbzsize(contents)long contents; /* 0 means what's the default */{ register long n; if (contents <= 0) { /* foulup or default inquiry */ DEBUG(("dbzsize: preposterous input (%ld)\n", contents)); return(DEFSIZE); } n = (contents/2)*3; /* try to keep table at most 2/3 full */ if (!(n&01)) /* make it odd */ n++; DEBUG(("dbzsize: tentative size %ld\n", n)); while (!isprime(n)) /* and look for a prime */ n += 2; DEBUG(("dbzsize: final size %ld\n", n)); return(n);}/* - isprime - is a number prime? * * This is not a terribly efficient approach. */static int /* predicate */isprime(x)register long x;{ static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 }; register int *ip; register long div; register long stop; /* hit the first few primes quickly to eliminate easy ones */ /* this incidentally prevents ridiculously small tables */ for (ip = quick; (div = *ip) != 0; ip++) if (x%div == 0) { DEBUG(("isprime: quick result on %ld\n", (long)x)); return(0); } /* approximate square root of x */ for (stop = x; x/stop < stop; stop >>= 1) continue; stop <<= 1; /* try odd numbers up to stop */ for (div = *--ip; div < stop; div += 2) if (x%div == 0) return(0); return(1);}/* - dbzagain - set up a new database to be a rebuild of an old one */int /* 0 success, -1 failure */dbzagain(name, oldname)char *name; /* base name; .dir and .pag must exist */char *oldname; /* base name; all must exist */{ register char *fn; struct dbzconfig c; register int i; register long top; register FILE *f; register int newtable; register off_t newsize; if (pagf != NULL) { DEBUG(("dbzagain: database already open\n")); return(-1); } /* pick up the old configuration */ fn = enstring(oldname, dir); if (fn == NULL) return(-1); f = fopen(fn, "r"); free(fn); if (f == NULL) { DEBUG(("dbzagain: cannot open old .dir file\n")); return(-1); } i = getconf(f, (FILE *)NULL, &c); (void) fclose(f); if (i < 0) { DEBUG(("dbzagain: getconf failed\n")); return(-1); } /* tinker with it */ top = 0; newtable = 0; for (i = 0; i < NUSEDS; i++) { if (top < c.used[i]) top = c.used[i]; if (c.used[i] == 0) newtable = 1; /* hasn't got full usage history yet */ } if (top == 0) { DEBUG(("dbzagain: old table has no contents!\n")); newtable = 1; } for (i = NUSEDS-1; i > 0; i--) c.used[i] = c.used[i-1]; c.used[0] = 0; newsize = dbzsize(top); if (!newtable || newsize > c.tsize) /* don't shrink new table */ c.tsize = newsize; /* write it out */ fn = enstring(name, dir); if (fn == NULL) return(-1); f = fopen(fn, "w"); free(fn); if (f == NULL) { DEBUG(("dbzagain: unable to write new .dir\n")); return(-1); } i = putconf(f, &c); (void) fclose(f); if (i < 0) { DEBUG(("dbzagain: putconf failed\n")); return(-1); } /* create/truncate .pag */ fn = enstring(name, pag); if (fn == NULL) return(-1); f = fopen(fn, "w"); free(fn); if (f == NULL) { DEBUG(("dbzagain: unable to create/truncate .pag file\n")); return(-1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -