📄 tdb.c
字号:
/* Unix SMB/Netbios implementation. Version 3.0 Samba database functions Copyright (C) Andrew Tridgell 1999-2000 Copyright (C) Luke Kenneth Casson Leighton 2000 Copyright (C) Paul `Rusty' Russell 2000 Copyright (C) Jeremy Allison 2000 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.*/#if HAVE_CONFIG_H#include <config.h>#endif#ifdef STANDALONE#include <stdlib.h>#include <stdio.h>#include <fcntl.h>#include <unistd.h>#include <string.h>#include <fcntl.h>#include <errno.h>#include <sys/mman.h>#include <sys/stat.h>#include "tdb.h"#include "spinlock.h"#else#include "includes.h"#endif#define TDB_MAGIC_FOOD "TDB file\n"#define TDB_VERSION (0x26011967 + 6)#define TDB_MAGIC (0x26011999U)#define TDB_FREE_MAGIC (~TDB_MAGIC)#define TDB_DEAD_MAGIC (0xFEE1DEAD)#define TDB_ALIGNMENT 4#define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGNMENT)#define DEFAULT_HASH_SIZE 131#define TDB_PAGE_SIZE 0x2000#define FREELIST_TOP (sizeof(struct tdb_header))#define TDB_ALIGN(x,a) (((x) + (a)-1) & ~((a)-1))#define TDB_BYTEREV(x) (((((x)&0xff)<<24)|((x)&0xFF00)<<8)|(((x)>>8)&0xFF00)|((x)>>24))#define TDB_DEAD(r) ((r)->magic == TDB_DEAD_MAGIC)#define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))#define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off))/* NB assumes there is a local variable called "tdb" that is the * current context, also takes doubly-parenthesized print-style * argument. */#define TDB_LOG(x) (tdb->log_fn?((tdb->log_fn x),0) : 0)/* lock offsets */#define GLOBAL_LOCK 0#define ACTIVE_LOCK 4#ifndef MAP_FILE#define MAP_FILE 0#endif#ifndef MAP_FAILED#define MAP_FAILED ((void *)-1)#endif#define BUCKET(hash) ((hash) % tdb->header.hash_size)TDB_DATA tdb_null;/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */static TDB_CONTEXT *tdbs = NULL;static void tdb_munmap(TDB_CONTEXT *tdb){ if (tdb->flags & TDB_INTERNAL) return;#ifdef HAVE_MMAP if (tdb->map_ptr) munmap(tdb->map_ptr, tdb->map_size);#endif tdb->map_ptr = NULL;}static void tdb_mmap(TDB_CONTEXT *tdb){ if (tdb->flags & TDB_INTERNAL) return;#ifdef HAVE_MMAP if (!(tdb->flags & TDB_NOMMAP)) { tdb->map_ptr = mmap(NULL, tdb->map_size, PROT_READ|(tdb->read_only? 0:PROT_WRITE), MAP_SHARED|MAP_FILE, tdb->fd, 0); /* * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! */ if (tdb->map_ptr == MAP_FAILED) { tdb->map_ptr = NULL; TDB_LOG((tdb, 2, "tdb_mmap failed for size %d (%s)\n", tdb->map_size, strerror(errno))); } } else { tdb->map_ptr = NULL; }#else tdb->map_ptr = NULL;#endif}/* Endian conversion: we only ever deal with 4 byte quantities */static void *convert(void *buf, u32 size){ u32 i, *p = buf; for (i = 0; i < size / 4; i++) p[i] = TDB_BYTEREV(p[i]); return buf;}#define DOCONV() (tdb->flags & TDB_CONVERT)#define CONVERT(x) (DOCONV() ? convert(&x, sizeof(x)) : &x)/* the body of the database is made of one list_struct for the free space plus a separate data list for each hash value */struct list_struct { tdb_off next; /* offset of the next record in the list */ tdb_len rec_len; /* total byte length of record */ tdb_len key_len; /* byte length of key */ tdb_len data_len; /* byte length of data */ u32 full_hash; /* the full 32 bit hash of the key */ u32 magic; /* try to catch errors */ /* the following union is implied: union { char record[rec_len]; struct { char key[key_len]; char data[data_len]; } u32 totalsize; (tailer) } */};/* a byte range locking function - return 0 on success this functions locks/unlocks 1 byte at the specified offset. On error, errno is also set so that errors are passed back properly through tdb_open(). */static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset, int rw_type, int lck_type, int probe){ struct flock fl; if (tdb->flags & TDB_NOLOCK) return 0; if (tdb->read_only) { errno = EACCES; return -1; } fl.l_type = rw_type; fl.l_whence = SEEK_SET; fl.l_start = offset; fl.l_len = 1; fl.l_pid = 0; if (fcntl(tdb->fd,lck_type,&fl)) { if (!probe) { TDB_LOG((tdb, 5,"tdb_brlock failed at offset %d rw_type=%d lck_type=%d\n", offset, rw_type, lck_type)); } /* errno set by fcntl */ return TDB_ERRCODE(TDB_ERR_LOCK, -1); } return 0;}/* lock a list in the database. list -1 is the alloc list */static int tdb_lock(TDB_CONTEXT *tdb, int list, int ltype){ if (list < -1 || list >= (int)tdb->header.hash_size) { TDB_LOG((tdb, 0,"tdb_lock: invalid list %d for ltype=%d\n", list, ltype)); return -1; } if (tdb->flags & TDB_NOLOCK) return 0; /* Since fcntl locks don't nest, we do a lock for the first one, and simply bump the count for future ones */ if (tdb->locked[list+1].count == 0) { if (!tdb->read_only && tdb->header.rwlocks) { if (tdb_spinlock(tdb, list, ltype)) { TDB_LOG((tdb, 0, "tdb_lock spinlock on list ltype=%d\n", list, ltype)); return -1; } } else if (tdb_brlock(tdb,FREELIST_TOP+4*list,ltype,F_SETLKW, 0)) { TDB_LOG((tdb, 0,"tdb_lock failed on list %d ltype=%d (%s)\n", list, ltype, strerror(errno))); return -1; } tdb->locked[list+1].ltype = ltype; } tdb->locked[list+1].count++; return 0;}/* unlock the database: returns void because it's too late for errors. */static void tdb_unlock(TDB_CONTEXT *tdb, int list, int ltype){ if (tdb->flags & TDB_NOLOCK) return; /* Sanity checks */ if (list < -1 || list >= (int)tdb->header.hash_size) return; if (tdb->locked[list+1].count==0) return; if (tdb->locked[list+1].count == 1) { /* Down to last nested lock: unlock underneath */ if (!tdb->read_only && tdb->header.rwlocks) tdb_spinunlock(tdb, list, ltype); else tdb_brlock(tdb, FREELIST_TOP+4*list, F_UNLCK, F_SETLKW, 0); } tdb->locked[list+1].count--;}/* This is based on the hash agorithm from gdbm */static u32 tdb_hash(TDB_DATA *key){ u32 value; /* Used to compute the hash value. */ u32 i; /* Used to cycle through random values. */ /* Set the initial value from the key size. */ for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++) value = (value + (key->dptr[i] << (i*5 % 24))); return (1103515243 * value + 12345); }/* check for an out of bounds access - if it is out of bounds then see if the database has been expanded by someone else and expand if necessary note that "len" is the minimum length needed for the db*/static int tdb_oob(TDB_CONTEXT *tdb, tdb_off len, int probe){ struct stat st; if (len <= tdb->map_size) return 0; if (tdb->flags & TDB_INTERNAL) { if (!probe) { TDB_LOG((tdb, 0,"tdb_oob len %d beyond internal malloc size %d\n", (int)len, (int)tdb->map_size)); } return TDB_ERRCODE(TDB_ERR_IO, -1); } if (fstat(tdb->fd, &st) == -1) return TDB_ERRCODE(TDB_ERR_IO, -1); if (st.st_size < (size_t)len) { if (!probe) { TDB_LOG((tdb, 0,"tdb_oob len %d beyond eof at %d\n", (int)len, (int)st.st_size)); } return TDB_ERRCODE(TDB_ERR_IO, -1); } /* Unmap, update size, remap */ tdb_munmap(tdb); tdb->map_size = st.st_size; tdb_mmap(tdb); return 0;}/* write a lump of data at a specified offset */static int tdb_write(TDB_CONTEXT *tdb, tdb_off off, void *buf, tdb_len len){ if (tdb_oob(tdb, off + len, 0) != 0) return -1; if (tdb->map_ptr) memcpy(off + (char *)tdb->map_ptr, buf, len);#ifdef HAVE_PWRITE else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {#else else if (lseek(tdb->fd, off, SEEK_SET) != off || write(tdb->fd, buf, len) != (ssize_t)len) {#endif TDB_LOG((tdb, 0,"tdb_write failed at %d len=%d (%s)\n", off, len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_IO, -1); } return 0;}/* read a lump of data at a specified offset, maybe convert */static int tdb_read(TDB_CONTEXT *tdb,tdb_off off,void *buf,tdb_len len,int cv){ if (tdb_oob(tdb, off + len, 0) != 0) return -1; if (tdb->map_ptr) memcpy(buf, off + (char *)tdb->map_ptr, len);#ifdef HAVE_PREAD else if (pread(tdb->fd, buf, len, off) != (ssize_t)len) {#else else if (lseek(tdb->fd, off, SEEK_SET) != off || read(tdb->fd, buf, len) != (ssize_t)len) {#endif TDB_LOG((tdb, 0,"tdb_read failed at %d len=%d (%s)\n", off, len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_IO, -1); } if (cv) convert(buf, len); return 0;}/* read a lump of data, allocating the space for it */static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len){ char *buf; if (!(buf = malloc(len))) { TDB_LOG((tdb, 0,"tdb_alloc_read malloc failed len=%d (%s)\n", len, strerror(errno))); return TDB_ERRCODE(TDB_ERR_OOM, buf); } if (tdb_read(tdb, offset, buf, len, 0) == -1) { free(buf); return NULL; } return buf;}/* read/write a tdb_off */static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d){ return tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());}static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d){ tdb_off off = *d; return tdb_write(tdb, offset, CONVERT(off), sizeof(*d));}/* read/write a record */static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec){ if (tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1) return -1; if (TDB_BAD_MAGIC(rec)) { TDB_LOG((tdb, 0,"rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); } return tdb_oob(tdb, rec->next+sizeof(*rec), 0);}static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec){ struct list_struct r = *rec; return tdb_write(tdb, offset, CONVERT(r), sizeof(r));}/* read a freelist record and check for simple errors */static int rec_free_read(TDB_CONTEXT *tdb, tdb_off off, struct list_struct *rec){ if (tdb_read(tdb, off, rec, sizeof(*rec),DOCONV()) == -1) return -1; if (rec->magic != TDB_FREE_MAGIC) { TDB_LOG((tdb, 0,"rec_free_read bad magic 0x%x at offset=%d\n", rec->magic, off)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1); } if (tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0) return -1; return 0;}/* update a record tailer (must hold allocation lock) */static int update_tailer(TDB_CONTEXT *tdb, tdb_off offset, const struct list_struct *rec){ tdb_off totalsize; /* Offset of tailer from record header */ totalsize = sizeof(*rec) + rec->rec_len; return ofs_write(tdb, offset + totalsize - sizeof(tdb_off), &totalsize);}static tdb_off tdb_dump_record(TDB_CONTEXT *tdb, tdb_off offset){ struct list_struct rec; tdb_off tailer_ofs, tailer; if (tdb_read(tdb, offset, (char *)&rec, sizeof(rec), DOCONV()) == -1) { printf("ERROR: failed to read record at %u\n", offset); return 0; } printf(" rec: offset=%u next=%d rec_len=%d key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n", offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, rec.full_hash, rec.magic); tailer_ofs = offset + sizeof(rec) + rec.rec_len - sizeof(tdb_off); if (ofs_read(tdb, tailer_ofs, &tailer) == -1) { printf("ERROR: failed to read tailer at %u\n", tailer_ofs); return rec.next; } if (tailer != rec.rec_len + sizeof(rec)) { printf("ERROR: tailer does not match record! tailer=%u totalsize=%u\n", tailer, rec.rec_len + sizeof(rec)); } return rec.next;}static void tdb_dump_chain(TDB_CONTEXT *tdb, int i){ tdb_off rec_ptr, top; top = TDB_HASH_TOP(i); tdb_lock(tdb, i, F_WRLCK); if (ofs_read(tdb, top, &rec_ptr) == -1) { tdb_unlock(tdb, i, F_WRLCK); return; } if (rec_ptr) printf("hash=%d\n", i); while (rec_ptr) { rec_ptr = tdb_dump_record(tdb, rec_ptr); } tdb_unlock(tdb, i, F_WRLCK);}void tdb_dump_all(TDB_CONTEXT *tdb){ int i; for (i=0;i<tdb->header.hash_size;i++) { tdb_dump_chain(tdb, i); } printf("freelist:\n"); tdb_dump_chain(tdb, -1);}void tdb_printfreelist(TDB_CONTEXT *tdb){ long total_free = 0; tdb_off offset, rec_ptr, last_ptr; struct list_struct rec; tdb_lock(tdb, -1, F_WRLCK); last_ptr = 0; offset = FREELIST_TOP; /* read in the freelist top */ if (ofs_read(tdb, offset, &rec_ptr) == -1) { return; } printf("freelist top=[0x%08x]\n", rec_ptr ); while (rec_ptr) { if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec), DOCONV()) == -1) { return; } if (rec.magic != TDB_FREE_MAGIC) { printf("bad magic 0x%08x in free list\n", rec.magic); return; } printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)]\n", rec.next, rec.rec_len, rec.rec_len ); total_free += rec.rec_len; /* move to the next record */ rec_ptr = rec.next; } printf("total rec_len = [0x%08x (%d)]\n", (int)total_free, (int)total_free); tdb_unlock(tdb, -1, F_WRLCK);}/* Remove an element from the freelist. Must have alloc lock. */static int remove_from_freelist(TDB_CONTEXT *tdb, tdb_off off, tdb_off next){ tdb_off last_ptr, i; /* read in the freelist top */ last_ptr = FREELIST_TOP; while (ofs_read(tdb, last_ptr, &i) != -1 && i != 0) { if (i == off) { /* We've found it! */ return ofs_write(tdb, last_ptr, &next); } /* Follow chain (next offset is at start of record) */ last_ptr = i; } TDB_LOG((tdb, 0,"remove_from_freelist: not on list at off=%d\n", off)); return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);}/* Add an element into the freelist. Merge adjacent records if neccessary. */static int tdb_free(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec){ tdb_off right, left; /* Allocation and tailer lock */ if (tdb_lock(tdb, -1, F_WRLCK) != 0) return -1; /* set an initial tailer, so if we fail we don't leave a bogus record */ update_tailer(tdb, offset, rec); /* Look right first (I'm an Australian, dammit) */ right = offset + sizeof(*rec) + rec->rec_len; if (right + sizeof(*rec) <= tdb->map_size) { struct list_struct r; if (tdb_read(tdb, right, &r, sizeof(r), DOCONV()) == -1) { TDB_LOG((tdb, 0, "tdb_free: right read failed at %u\n", right)); goto left; } /* If it's free, expand to include it. */ if (r.magic == TDB_FREE_MAGIC) { if (remove_from_freelist(tdb, right, r.next) == -1) { TDB_LOG((tdb, 0, "tdb_free: right free failed at %u\n", right)); goto left; } rec->rec_len += sizeof(r) + r.rec_len; } }left: /* Look left */ left = offset - sizeof(tdb_off); if (left > TDB_HASH_TOP(tdb->header.hash_size-1)) { struct list_struct l; tdb_off leftsize; /* Read in tailer and jump back to header */ if (ofs_read(tdb, left, &leftsize) == -1) { TDB_LOG((tdb, 0, "tdb_free: left offset read failed at %u\n", left)); goto update; } left = offset - leftsize; /* Now read in record */ if (tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) { TDB_LOG((tdb, 0, "tdb_free: left read failed at %u (%u)\n", left, leftsize)); goto update; } /* If it's free, expand to include it. */ if (l.magic == TDB_FREE_MAGIC) { if (remove_from_freelist(tdb, left, l.next) == -1) { TDB_LOG((tdb, 0, "tdb_free: left free failed at %u\n", left)); goto update; } else { offset = left;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -