📄 tdb.c
字号:
/* Unix SMB/CIFS implementation. trivial database library Copyright (C) Andrew Tridgell 1999-2004 Copyright (C) Paul `Rusty' Russell 2000 Copyright (C) Jeremy Allison 2000-2003 ** NOTE! The following LGPL license applies to the tdb ** library. This does NOT imply that all of Samba is released ** under the LGPL This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*//* NOTE: If you use tdbs under valgrind, and in particular if you run * tdbtorture, you may get spurious "uninitialized value" warnings. I * think this is because valgrind doesn't understand that the mmap'd * area may be written to by other processes. Memory can, from the * point of view of the grinded process, spontaneously become * initialized. * * I can think of a few solutions. [mbp 20030311] * * 1 - Write suppressions for Valgrind so that it doesn't complain * about this. Probably the most reasonable but people need to * remember to use them. * * 2 - Use IO not mmap when running under valgrind. Not so nice. * * 3 - Use the special valgrind macros to mark memory as valid at the * right time. Probably too hard -- the process just doesn't know. */ #include <stdlib.h>#include <stdio.h>#include <fcntl.h>#include <unistd.h>#include <string.h>#include <fcntl.h>#include <errno.h>#include <sys/mman.h>#include <sys/stat.h>#include <signal.h>#include "tdb.h"#include "spinlock.h"#define TDB_MAGIC_FOOD "TDB file\n"#define TDB_VERSION (0x26011967 + 6)#define TDB_MAGIC (0x26011999U)#define TDB_FREE_MAGIC (~TDB_MAGIC)#define TDB_DEAD_MAGIC (0xFEE1DEAD)#define TDB_ALIGNMENT 4#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT)#define DEFAULT_HASH_SIZE 131#define TDB_PAGE_SIZE 0x2000#define FREELIST_TOP (sizeof(struct tdb_header))#define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))#define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))#define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)#define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))#define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off))#define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + TDB_SPINLOCK_SIZE(hash_size))/* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style * argument. */#define TDB_LOG(x) (tdb->log_fn?((tdb->log_fn x),0) : 0)/* lock offsets */#define GLOBAL_LOCK 0#define ACTIVE_LOCK 4#ifndef MAP_FILE#define MAP_FILE 0#endif#ifndef MAP_FAILED#define MAP_FAILED ((void *)-1)#endif/* free memory if the pointer is valid and zero the pointer */#ifndef SAFE_FREE#define SAFE_FREE(x) do { if ((x) != NULL) {free((x)); (x)=NULL;} } while(0)#endif#define BUCKET(hash) ((hash) % tdb->header.hash_size)TDB_DATA tdb_null;/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */static TDB_CONTEXT *tdbs = NULL;static int tdb_munmap(TDB_CONTEXT *tdb){ if (tdb->flags & TDB_INTERNAL) return 0;#ifdef HAVE_MMAP if (tdb->map_ptr) { int ret = munmap(tdb->map_ptr, tdb->map_size); if (ret != 0) return ret; }#endif tdb->map_ptr = NULL; return 0;}static void tdb_mmap(TDB_CONTEXT *tdb){ if (tdb->flags & TDB_INTERNAL) return;#ifdef HAVE_MMAP if (!(tdb->flags & TDB_NOMMAP)) { tdb->map_ptr = mmap(NULL, tdb->map_size, PROT_READ|(tdb->read_only? 0:PROT_WRITE), MAP_SHARED|MAP_FILE, tdb->fd, 0); /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! */ if (tdb->map_ptr == MAP_FAILED) { tdb->map_ptr = NULL; TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n", tdb->map_size, strerror(errno))); } } else { tdb->map_ptr = NULL; }#else tdb->map_ptr = NULL;#endif}/* Endian conversion: we only ever deal with 4 byte quantities */static void *convert(void *buf, u32 size){ u32 i, *p = buf; for (i = 0; i < size / 4; i++) p[i] = TDB_BYTEREV(p[i]); return buf;}#define DOCONV() (tdb->flags & TDB_CONVERT)#define CONVERT(x) (DOCONV() ? convert(&x, sizeof(x)) : &x)/* the body of the database is made of one list_struct for the free space plus a separate data list for each hash value */struct list_struct { tdb_off next; /* offset of the next record in the list */ tdb_len rec_len; /* total byte length of record */ tdb_len key_len; /* byte length of key */ tdb_len data_len; /* byte length of data */ u32 full_hash; /* the full 32 bit hash of the key */ u32 magic; /* try to catch errors */ /* the following union is implied: union { char record[rec_len]; struct { char key[key_len]; char data[data_len]; } u32 totalsize; (tailer) } */};/*************************************************************** Allow a caller to set a "alarm" flag that tdb can check to abort a blocking lock on SIGALRM.***************************************************************/static sig_atomic_t *palarm_fired;void tdb_set_lock_alarm(sig_atomic_t *palarm){ palarm_fired = palarm;}/* a byte range locking function - return 0 on success this functions locks/unlocks 1 byte at the specified offset. On error, errno is also set so that errors are passed back properly through tdb_open(). */static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, int rw_type, int lck_type, int probe){ struct flock fl; int ret; if (tdb->flags & TDB_NOLOCK) return 0; if ((rw_type == F_WRLCK) && (tdb->read_only)) { errno = EACCES; return -1; } fl.l_type = rw_type; fl.l_whence = SEEK_SET; fl.l_start = offset; fl.l_len = 1; fl.l_pid = 0; do { ret = fcntl(tdb->fd,lck_type,&fl); if (ret == -1 && errno == EINTR && palarm_fired && *palarm_fired) break; } while (ret == -1 && errno == EINTR); if (ret == -1) { if (!probe && lck_type != F_SETLK) { /* Ensure error code is set for log fun to examine. */ if (errno == EINTR && palarm_fired && *palarm_fired) tdb->ecode = TDB_ERR_LOCK_TIMEOUT; else tdb->ecode = TDB_ERR_LOCK; TDB_LOG((tdb, 5,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d\n", tdb->fd, offset, rw_type, lck_type)); } /* Was it an alarm timeout ? */ if (errno == EINTR && palarm_fired && *palarm_fired) { TDB_LOG((tdb, 5, "tdb_brlock timed out (fd=%d) at offset %d rw_type=%d lck_type=%d\n", tdb->fd, offset, rw_type, lck_type)); return TDB_ERRCODE(TDB_ERR_LOCK_TIMEOUT, -1); } /* Otherwise - generic lock error. errno set by fcntl. * EAGAIN is an expected return from non-blocking * locks. */ if (errno != EAGAIN) { TDB_LOG((tdb, 5, "tdb_brlock failed (fd=%d) at offset %d rw_type=%d lck_type=%d: %s\n", tdb->fd, offset, rw_type, lck_type, strerror(errno))); } return TDB_ERRCODE(TDB_ERR_LOCK, -1); } return 0;}/* lock a list in the database. list -1 is the alloc list */static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype){ if (list < -1 || list >= (int)tdb->header.hash_size) { TDB_LOG((tdb, 0,"tdb_lock: invalid list %d for ltype=%d\n", list, ltype)); return -1; } if (tdb->flags & TDB_NOLOCK) return 0; /* Since fcntl locks don't nest, we do a lock for the first one, and simply bump the count for future ones */ if (tdb->locked[list+1].count == 0) { if (!tdb->read_only && tdb->header.rwlocks) { if (tdb_spinlock(tdb, list, ltype)) { TDB_LOG((tdb, 0, "tdb_lock spinlock failed on list %d ltype=%d\n", list, ltype)); return -1; } } else if (tdb_brlock(tdb,FREELIST_TOP+4*list,ltype,F_SETLKW, 0)) { TDB_LOG((tdb, 0,"tdb_lock failed on list %d ltype=%d (%s)\n", list, ltype, strerror(errno))); return -1; } tdb->locked[list+1].ltype = ltype; } tdb->locked[list+1].count++; return 0;}/* unlock the database: returns void because it's too late for errors. */ /* changed to return int it may be interesting to know there has been an error --simo */static int tdb_unlock(TDB_CONTEXT *tdb, int list, int ltype){ int ret = -1; if (tdb->flags & TDB_NOLOCK) return 0; /* Sanity checks */ if (list < -1 || list >= (int)tdb->header.hash_size) { TDB_LOG((tdb, 0, "tdb_unlock: list %d invalid (%d)\n", list, tdb->header.hash_size)); return ret; } if (tdb->locked[list+1].count==0) { TDB_LOG((tdb, 0, "tdb_unlock: count is 0\n")); return ret; } if (tdb->locked[list+1].count == 1) { /* Down to last nested lock: unlock underneath */ if (!tdb->read_only && tdb->header.rwlocks) { ret = tdb_spinunlock(tdb, list, ltype); } else { ret = tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, F_SETLKW, 0); } } else { ret = 0; } tdb->locked[list+1].count--; if (ret) TDB_LOG((tdb, 0,"tdb_unlock: An error occurred unlocking!\n")); return ret;}/* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary note that "len" is the minimum length needed for the db*/static int tdb_oob(TDB_CONTEXT *tdb, tdb_off len, int probe){ struct stat st; if (len <= tdb->map_size) return 0; if (tdb->flags & TDB_INTERNAL) { if (!probe) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n", (int)len, (int)tdb->map_size)); } return TDB_ERRCODE(TDB_ERR_IO, -1); } if (fstat(tdb->fd, &st) == -1) return TDB_ERRCODE(TDB_ERR_IO, -1); if (st.st_size < (size_t)len) { if (!probe) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n", (int)len, (int)st.st_size)); } return TDB_ERRCODE(TDB_ERR_IO, -1); } /* Unmap, update size, remap */ if (tdb_munmap(tdb) == -1) return TDB_ERRCODE(TDB_ERR_IO, -1); tdb->map_size = st.st_size; tdb_mmap(tdb); return 0;}/* write a lump of data at a specified offset */static int tdb_write(TDB_CONTEXT *tdb, tdb_off off, void *buf, tdb_len len){ if (tdb_oob(tdb, off + len, 0) != 0) return -1; if (tdb->map_ptr) memcpy(off + (char *)tdb->map_ptr, buf, len);#ifdef HAVE_PWRITE else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {#else else if (lseek(tdb->fd, off, SEEK_SET) != off || write(tdb->fd, buf, len) != (ssize_t)len) {#endif /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n", off, len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_IO, -1); } return 0;}/* read a lump of data at a specified offset, maybe convert */static int tdb_read(TDB_CONTEXT *tdb,tdb_off off,void *buf,tdb_len len,int cv){ if (tdb_oob(tdb, off + len, 0) != 0) return -1; if (tdb->map_ptr) memcpy(buf, off + (char *)tdb->map_ptr, len);#ifdef HAVE_PREAD else if (pread(tdb->fd, buf, len, off) != (ssize_t)len) {#else else if (lseek(tdb->fd, off, SEEK_SET) != off || read(tdb->fd, buf, len) != (ssize_t)len) {#endif /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_IO; TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n", off, len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_IO, -1); } if (cv) convert(buf, len); return 0;}/* read a lump of data, allocating the space for it */static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len){ char *buf; if (!(buf = malloc(len))) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_OOM; TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n", len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_OOM, buf); } if (tdb_read(tdb, offset, buf, len, 0) == -1) { SAFE_FREE(buf); return NULL; } return buf;}/* read/write a tdb_off */static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d){ return tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());}static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d){ tdb_off off = *d; return tdb_write(tdb, offset, CONVERT(off), sizeof(*d));}/* read/write a record */static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec){ if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1) return -1; if (TDB_BAD_MAGIC(rec)) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_CORRUPT; TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); } return tdb_oob(tdb, rec->next+sizeof(*rec), 0);}static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec){ struct list_struct r = *rec; return tdb_write(tdb, offset, CONVERT(r), sizeof(r));}/* read a freelist record and check for simple errors */static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec){ if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1) return -1; if (rec->magic == TDB_MAGIC) { /* this happens when a app is showdown while deleting a record - we should not completely fail when this happens */ TDB_LOG((tdb, 0,"rec_free_read non-free magic 0x%x at offset=%d - fixing\n", rec->magic, off)); rec->magic = TDB_FREE_MAGIC; if (tdb_write(tdb, off, rec, sizeof(*rec)) == -1) return -1; } if (rec->magic != TDB_FREE_MAGIC) { /* Ensure ecode is set for log fn. */ tdb->ecode = TDB_ERR_CORRUPT; TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n", rec->magic, off)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); } if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0) return -1; return 0;}/* update a record tailer (must hold allocation lock) */static int update_tailer(TDB_CONTEXT *tdb, tdb_off offset, const struct list_struct *rec){ tdb_off totalsize; /* Offset of tailer from record header */ totalsize = sizeof(*rec) + rec->rec_len; return ofs_write(tdb, offset + totalsize - sizeof(tdb_off), &totalsize);}static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -