📄 tdb.c

📁 Linux下的一个关系数据库源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
				rec->rec_len += leftsize;			}		}	}update:	if (update_tailer(tdb, offset, rec) == -1) {		TDB_LOG((tdb, 0, "tdb_free: update_tailer failed at %u\n", offset));		goto fail;	}	/* Now, prepend to free list */	rec->magic = TDB_FREE_MAGIC;	if (ofs_read(tdb, FREELIST_TOP, &rec->next) == -1 ||	    rec_write(tdb, offset, rec) == -1 ||	    ofs_write(tdb, FREELIST_TOP, &offset) == -1) {		TDB_LOG((tdb, 0, "tdb_free record write failed at offset=%d\n", offset));		goto fail;	}	/* And we're done. */	tdb_unlock(tdb, -1, F_WRLCK);	return 0; fail:	tdb_unlock(tdb, -1, F_WRLCK);	return -1;}/* expand a file.  we prefer to use ftruncate, as that is what posix  says to use for mmap expansion */static int expand_file(TDB_CONTEXT *tdb, tdb_off size, tdb_off addition){	char buf[1024];#if HAVE_FTRUNCATE_EXTEND	if (ftruncate(tdb->fd, size+addition) != 0) {		TDB_LOG((tdb, 0, "expand_file ftruncate to %d failed (%s)\n", 			   size+addition, strerror(errno)));		return -1;	}#else	char b = 0;#ifdef HAVE_PWRITE	if (pwrite(tdb->fd,  &b, 1, (size+addition) - 1) != 1) {#else	if (lseek(tdb->fd, (size+addition) - 1, SEEK_SET) != (size+addition) - 1 || 	    write(tdb->fd, &b, 1) != 1) {#endif		TDB_LOG((tdb, 0, "expand_file to %d failed (%s)\n", 			   size+addition, strerror(errno)));		return -1;	}#endif	/* now fill the file with something. This ensures that the file isn't sparse, which would be	   very bad if we ran out of disk. This must be done with write, not via mmap */	memset(buf, 0x42, sizeof(buf));	while (addition) {		int n = addition>sizeof(buf)?sizeof(buf):addition;#ifdef HAVE_PWRITE		int ret = pwrite(tdb->fd, buf, n, size);#else		int ret;		if (lseek(tdb->fd, size, SEEK_SET) != size)			return -1;		ret = write(tdb->fd, buf, n);#endif		if (ret != n) {			TDB_LOG((tdb, 0, "expand_file write of %d failed (%s)\n", 				   n, strerror(errno)));			return -1;		}		addition -= n;		size += n;	}	return 0;}/* expand the database at least size bytes by expanding the underlying   file and doing the mmap again if necessary */static int tdb_expand(TDB_CONTEXT *tdb, tdb_off size){	struct list_struct rec;	tdb_off offset;	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {		TDB_LOG((tdb, 0, "lock failed in tdb_expand\n"));		return -1;	}	/* must know about any previous expansions by another process */	tdb_oob(tdb, tdb->map_size + 1, 1);	/* always make room for at least 10 more records, and round           the database up to a multiple of TDB_PAGE_SIZE */	size = TDB_ALIGN(tdb->map_size + size*10, TDB_PAGE_SIZE) - tdb->map_size;	if (!(tdb->flags & TDB_INTERNAL))		tdb_munmap(tdb);	/*	 * We must ensure the file is unmapped before doing this	 * to ensure consistency with systems like OpenBSD where	 * writes and mmaps are not consistent.	 */	/* expand the file itself */	if (!(tdb->flags & TDB_INTERNAL)) {		if (expand_file(tdb, tdb->map_size, size) != 0)			goto fail;	}	tdb->map_size += size;	if (tdb->flags & TDB_INTERNAL)		tdb->map_ptr = realloc(tdb->map_ptr, tdb->map_size);	else {		/*		 * We must ensure the file is remapped before adding the space		 * to ensure consistency with systems like OpenBSD where		 * writes and mmaps are not consistent.		 */		/* We're ok if the mmap fails as we'll fallback to read/write */		tdb_mmap(tdb);	}	/* form a new freelist record */	memset(&rec,'\0',sizeof(rec));	rec.rec_len = size - sizeof(rec);	/* link it into the free list */	offset = tdb->map_size - size;	if (tdb_free(tdb, offset, &rec) == -1)		goto fail;	tdb_unlock(tdb, -1, F_WRLCK);	return 0; fail:	tdb_unlock(tdb, -1, F_WRLCK);	return -1;}/* allocate some space from the free list. The offset returned points   to a unconnected list_struct within the database with room for at   least length bytes of total data   0 is returned if the space could not be allocated */static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length,			    struct list_struct *rec){	tdb_off rec_ptr, last_ptr, newrec_ptr;	struct list_struct newrec;	if (tdb_lock(tdb, -1, F_WRLCK) == -1)		return 0;	/* Extra bytes required for tailer */	length += sizeof(tdb_off); again:	last_ptr = FREELIST_TOP;	/* read in the freelist top */	if (ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)		goto fail;	/* keep looking until we find a freelist record big enough */	while (rec_ptr) {		if (rec_free_read(tdb, rec_ptr, rec) == -1)			goto fail;		if (rec->rec_len >= length) {			/* found it - now possibly split it up  */			if (rec->rec_len > length + MIN_REC_SIZE) {				/* Length of left piece */				length = TDB_ALIGN(length, TDB_ALIGNMENT);				/* Right piece to go on free list */				newrec.rec_len = rec->rec_len					- (sizeof(*rec) + length);				newrec_ptr = rec_ptr + sizeof(*rec) + length;				/* And left record is shortened */				rec->rec_len = length;			} else				newrec_ptr = 0;			/* Remove allocated record from the free list */			if (ofs_write(tdb, last_ptr, &rec->next) == -1)				goto fail;			/* Update header: do this before we drop alloc                           lock, otherwise tdb_free() might try to                           merge with us, thinking we're free.                           (Thanks Jeremy Allison). */			rec->magic = TDB_MAGIC;			if (rec_write(tdb, rec_ptr, rec) == -1)				goto fail;			/* Did we create new block? */			if (newrec_ptr) {				/* Update allocated record tailer (we                                   shortened it). */				if (update_tailer(tdb, rec_ptr, rec) == -1)					goto fail;				/* Free new record */				if (tdb_free(tdb, newrec_ptr, &newrec) == -1)					goto fail;			}			/* all done - return the new record offset */			tdb_unlock(tdb, -1, F_WRLCK);			return rec_ptr;		}		/* move to the next record */		last_ptr = rec_ptr;		rec_ptr = rec->next;	}	/* we didn't find enough space. See if we can expand the	   database and if we can then try again */	if (tdb_expand(tdb, length + sizeof(*rec)) == 0)		goto again; fail:	tdb_unlock(tdb, -1, F_WRLCK);	return 0;}/* initialise a new database with a specified hash size */static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size){	struct tdb_header *newdb;	int size, ret = -1;	/* We make it up in memory, then write it out if not internal */	size = sizeof(struct tdb_header) + (hash_size+1)*sizeof(tdb_off);	if (!(newdb = calloc(size, 1)))		return TDB_ERRCODE(TDB_ERR_OOM, -1);	/* Fill in the header */	newdb->version = TDB_VERSION;	newdb->hash_size = hash_size;#ifdef USE_SPINLOCKS	newdb->rwlocks = size;#endif	if (tdb->flags & TDB_INTERNAL) {		tdb->map_size = size;		tdb->map_ptr = (char *)newdb;		memcpy(&tdb->header, newdb, sizeof(tdb->header));		/* Convert the `ondisk' version if asked. */		CONVERT(*newdb);		return 0;	}	if (lseek(tdb->fd, 0, SEEK_SET) == -1)		goto fail;	if (ftruncate(tdb->fd, 0) == -1)		goto fail;	/* This creates an endian-converted header, as if read from disk */	CONVERT(*newdb);	memcpy(&tdb->header, newdb, sizeof(tdb->header));	/* Don't endian-convert the magic food! */	memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);	if (write(tdb->fd, newdb, size) != size)		ret = -1;	else		ret = tdb_create_rwlocks(tdb->fd, hash_size);  fail:	free(newdb);	return ret;}/* Returns 0 on fail.  On success, return offset of record, and fills   in rec */static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, u32 hash,			struct list_struct *r){	tdb_off rec_ptr;		/* read in the hash top */	if (ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)		return 0;	/* keep looking until we find the right record */	while (rec_ptr) {		if (rec_read(tdb, rec_ptr, r) == -1)			return 0;		if (!TDB_DEAD(r) && hash==r->full_hash && key.dsize==r->key_len) {			char *k;			/* a very likely hit - read the key */			k = tdb_alloc_read(tdb, rec_ptr + sizeof(*r), 					   r->key_len);			if (!k)				return 0;			if (memcmp(key.dptr, k, key.dsize) == 0) {				free(k);				return rec_ptr;			}			free(k);		}		rec_ptr = r->next;	}	return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);}/* If they do lockkeys, check that this hash is one they locked */static int tdb_keylocked(TDB_CONTEXT *tdb, u32 hash){	u32 i;	if (!tdb->lockedkeys)		return 1;	for (i = 0; i < tdb->lockedkeys[0]; i++)		if (tdb->lockedkeys[i+1] == hash)			return 1;	return TDB_ERRCODE(TDB_ERR_NOLOCK, 0);}/* As tdb_find, but if you succeed, keep the lock */static tdb_off tdb_find_lock(TDB_CONTEXT *tdb, TDB_DATA key, int locktype,			     struct list_struct *rec){	u32 hash, rec_ptr;	hash = tdb_hash(&key);	if (!tdb_keylocked(tdb, hash))		return 0;	if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)		return 0;	if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))		tdb_unlock(tdb, BUCKET(hash), locktype);	return rec_ptr;}enum TDB_ERROR tdb_error(TDB_CONTEXT *tdb){	return tdb->ecode;}static struct tdb_errname {	enum TDB_ERROR ecode; const char *estring;} emap[] = { {TDB_SUCCESS, "Success"},	     {TDB_ERR_CORRUPT, "Corrupt database"},	     {TDB_ERR_IO, "IO Error"},	     {TDB_ERR_LOCK, "Locking error"},	     {TDB_ERR_OOM, "Out of memory"},	     {TDB_ERR_EXISTS, "Record exists"},	     {TDB_ERR_NOLOCK, "Lock exists on other keys"},	     {TDB_ERR_NOEXIST, "Record does not exist"} };/* Error string for the last tdb error */const char *tdb_errorstr(TDB_CONTEXT *tdb){	u32 i;	for (i = 0; i < sizeof(emap) / sizeof(struct tdb_errname); i++)		if (tdb->ecode == emap[i].ecode)			return emap[i].estring;	return "Invalid error code";}/* update an entry in place - this only works if the new data size   is <= the old data size and the key exists.   on failure return -1*/static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf){	struct list_struct rec;	tdb_off rec_ptr;	int ret = -1;	/* find entry */	if (!(rec_ptr = tdb_find_lock(tdb, key, F_WRLCK, &rec)))		return -1;	/* must be long enough key, data and tailer */	if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off)) {		tdb->ecode = TDB_SUCCESS; /* Not really an error */		goto out;	}	if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,		      dbuf.dptr, dbuf.dsize) == -1)		goto out;	if (dbuf.dsize != rec.data_len) {		/* update size */		rec.data_len = dbuf.dsize;		ret = rec_write(tdb, rec_ptr, &rec);	} else		ret = 0; out:	tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK);	return ret;}/* find an entry in the database given a key */TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key){	tdb_off rec_ptr;	struct list_struct rec;	TDB_DATA ret;	/* find which hash bucket it is in */	if (!(rec_ptr = tdb_find_lock(tdb,key,F_RDLCK,&rec)))		return tdb_null;	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,				  rec.data_len);	ret.dsize = rec.data_len;	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);	return ret;}/* check if an entry in the database exists    note that 1 is returned if the key is found and 0 is returned if not found   this doesn't match the conventions in the rest of this module, but is   compatible with gdbm*/int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key){	struct list_struct rec;		if (tdb_find_lock(tdb, key, F_RDLCK, &rec) == 0)		return 0;	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);	return 1;}/* record lock stops delete underneath */static int lock_record(TDB_CONTEXT *tdb, tdb_off off){	return off ? tdb_brlock(tdb, off, F_RDLCK, F_SETLKW, 0) : 0;}/*  Write locks override our own fcntl readlocks, so check it here.  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not  an error to fail to get the lock here.*/ static int write_lock_record(TDB_CONTEXT *tdb, tdb_off off){	struct tdb_traverse_lock *i;	for (i = &tdb->travlocks; i; i = i->next)		if (i->off == off)			return -1;	return tdb_brlock(tdb, off, F_WRLCK, F_SETLK, 1);}/*  Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not  an error to fail to get the lock here.*/static int write_unlock_record(TDB_CONTEXT *tdb, tdb_off off){	return tdb_brlock(tdb, off, F_UNLCK, F_SETLK, 0);}/* fcntl locks don't stack: avoid unlocking someone else's */static int unlock_record(TDB_CONTEXT *tdb, tdb_off off){	struct tdb_traverse_lock *i;	u32 count = 0;	if (off == 0)		return 0;	for (i = &tdb->travlocks; i; i = i->next)		if (i->off == off)			count++;	return (count == 1 ? tdb_brlock(tdb, off, F_UNLCK, F_SETLKW, 0) : 0);}/* actually delete an entry in the database given the offset */static int do_delete(TDB_CONTEXT *tdb, tdb_off rec_ptr, struct list_struct*rec){	tdb_off last_ptr, i;	struct list_struct lastrec;	if (tdb->read_only) return -1;	if (write_lock_record(tdb, rec_ptr) == -1) {		/* Someone traversing here: mark it as dead */		rec->magic = TDB_DEAD_MAGIC;		return rec_write(tdb, rec_ptr, rec);	}	write_unlock_record(tdb, rec_ptr);	/* find previous record in hash chain */	if (ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)		return -1;	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)		if (rec_read(tdb, i, &lastrec) == -1)			return -1;	/* unlink it: next ptr is at start of record. */	if (last_ptr == 0)		last_ptr = TDB_HASH_TOP(rec->full_hash);	if (ofs_write(tdb, last_ptr, &rec->next) == -1)		return -1;	/* recover the space */	if (tdb_free(tdb, rec_ptr, rec) == -1)		return -1;	return 0;}/* Uses traverse lock: 0 = finish, -1 = error, other = record offset */static int tdb_next_lock(TDB_CONTEXT *tdb, struct tdb_traverse_lock *tlock,			 struct list_struct *rec){	int want_next = (tlock->off != 0);	/* No traversal allows if you've called tdb_lockkeys() */	if (tdb->lockedkeys)		return TDB_ERRCODE(TDB_ERR_NOLOCK, -1);	/* Lock each chain from the start one. */	for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {		if (tdb_lock(tdb, tlock->hash, F_WRLCK) == -1)			return -1;		/* No previous record?  Start at top of chain. */		if (!tlock->off) {			if (ofs_read(tdb, TDB_HASH_TOP(tlock->hash),				     &tlock->off) == -1)				goto fail;		} else {			/* Otherwise unlock the previous record. */			unlock_record(tdb, tlock->off);		}		if (want_next) {			/* We have offset of old record: grab next */			if (rec_read(tdb, tlock->off, rec) == -1)				goto fail;			tlock->off = rec->next;		}		/* Iterate through chain */		while( tlock->off) {			tdb_off current;			if (rec_read(tdb, tlock->off, rec) == -1)				goto fail;			if (!TDB_DEAD(rec)) {				/* Woohoo: we found one! */				lock_record(tdb, tlock->off);				return tlock->off;			}			/* Try to clean dead ones from old traverses */			current = tlock->off;			tlock->off = rec->next;			do_delete(tdb, current, rec);		}		tdb_unlock(tdb, tlock->hash, F_WRLCK);		want_next = 0;	}	/* We finished iteration without finding anything */	return TDB_ERRCODE(TDB_SUCCESS, 0); fail:	tlock->off = 0;	tdb_unlock(tdb, tlock->hash, F_WRLCK);	return -1;}/* traverse the entire database - calling fn(tdb, key, data) on each element.   return -1 on error or the record count traversed   if fn is NULL then it is not called   a non-zero return value from fn() indicates that the traversal should stop  */int tdb_traverse(TDB_CONTEXT *tdb, tdb_traverse_func fn, void *state){
💿 文件大小 137 K
👤 上传用户 junglili
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#Linux #数据库 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -