📄 tchdb.c
字号:
/************************************************************************************************* * The hash database API of Tokyo Cabinet * Copyright (C) 2006-2009 Mikio Hirabayashi * This file is part of Tokyo Cabinet. * Tokyo Cabinet is free software; you can redistribute it and/or modify it under the terms of * the GNU Lesser General Public License as published by the Free Software Foundation; either * version 2.1 of the License or any later version. Tokyo Cabinet is distributed in the hope * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * You should have received a copy of the GNU Lesser General Public License along with Tokyo * Cabinet; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, * Boston, MA 02111-1307 USA. *************************************************************************************************/#include "tcutil.h"#include "tchdb.h"#include "tcbdb.h"#include "myconf.h"#define HDBFILEMODE 00644 // permission of created files#define HDBIOBUFSIZ 8192 // size of an I/O buffer#define HDBMAGICDATA "ToKyO CaBiNeT" // magic data for identification#define HDBHEADSIZ 256 // size of the reagion of the header#define HDBTYPEOFF 32 // offset of the region for the database type#define HDBFLAGSOFF 33 // offset of the region for the additional flags#define HDBAPOWOFF 34 // offset of the region for the alignment power#define HDBFPOWOFF 35 // offset of the region for the free block pool power#define HDBOPTSOFF 36 // offset of the region for the options#define HDBBNUMOFF 40 // offset of the region for the bucket number#define HDBRNUMOFF 48 // offset of the region for the record number#define HDBFSIZOFF 56 // offset of the region for the file size#define HDBFRECOFF 64 // offset of the region for the first record offset#define HDBOPAQUEOFF 128 // offset of the region for the opaque field#define HDBDEFBNUM 131071 // default bucket number#define HDBDEFAPOW 4 // default alignment power#define HDBMAXAPOW 16 // maximum alignment power#define HDBDEFFPOW 10 // default free block pool power#define HDBMAXFPOW 20 // maximum free block pool power#define HDBDEFXMSIZ ((1LL<<20)*64) // default size of the extra mapped memory#define HDBXFSIZINC 32768 // increment of extra file size#define HDBMINRUNIT 48 // minimum record reading unit#define HDBMAXHSIZ 32 // maximum record header size#define HDBFBPALWRAT 2 // allowance ratio of the free block pool#define HDBFBPBSIZ 64 // base region size of the free block pool#define HDBFBPESIZ 4 // size of each region of the free block pool#define HDBFBPMGFREQ 4096 // frequency to merge the free block pool#define HDBDRPUNIT 65536 // unit size of the delayed record pool#define HDBDRPLAT 2048 // latitude size of the delayed record pool#define HDBCACHEOUT 128 // number of records in a process of cacheout#define HDBWALSUFFIX "wal" // suffix of write ahead logging filetypedef struct { // type of structure for a record uint64_t off; // offset of the record uint32_t rsiz; // size of the whole record uint8_t magic; // magic number uint8_t hash; // second hash value uint64_t left; // offset of the left child record uint64_t right; // offset of the right child record uint32_t ksiz; // size of the key uint32_t vsiz; // size of the value uint16_t psiz; // size of the padding const char *kbuf; // pointer to the key const char *vbuf; // pointer to the value uint64_t boff; // offset of the body char *bbuf; // buffer of the body} TCHREC;typedef struct { // type of structure for a free block uint64_t off; // offset of the block uint32_t rsiz; // size of the block} HDBFB;enum { // enumeration for magic data HDBMAGICREC = 0xc8, // for data block HDBMAGICFB = 0xb0 // for free block};enum { // enumeration for duplication behavior HDBPDOVER, // overwrite an existing value HDBPDKEEP, // keep the existing value HDBPDCAT, // concatenate values HDBPDADDINT, // add an integer HDBPDADDDBL, // add a real number HDBPDPROC // process by a callback function};typedef struct { // type of structure for a duplication callback TCPDPROC proc; // function pointer void *op; // opaque pointer} HDBPDPROCOP;/* private macros */#define HDBLOCKMETHOD(TC_hdb, TC_wr) \ ((TC_hdb)->mmtx ? tchdblockmethod((TC_hdb), (TC_wr)) : true)#define HDBUNLOCKMETHOD(TC_hdb) \ ((TC_hdb)->mmtx ? tchdbunlockmethod(TC_hdb) : true)#define HDBLOCKRECORD(TC_hdb, TC_bidx, TC_wr) \ ((TC_hdb)->mmtx ? tchdblockrecord((TC_hdb), (uint8_t)(TC_bidx), (TC_wr)) : true)#define HDBUNLOCKRECORD(TC_hdb, TC_bidx) \ ((TC_hdb)->mmtx ? tchdbunlockrecord((TC_hdb), (uint8_t)(TC_bidx)) : true)#define HDBLOCKALLRECORDS(TC_hdb, TC_wr) \ ((TC_hdb)->mmtx ? tchdblockallrecords((TC_hdb), (TC_wr)) : true)#define HDBUNLOCKALLRECORDS(TC_hdb) \ ((TC_hdb)->mmtx ? tchdbunlockallrecords(TC_hdb) : true)#define HDBLOCKDB(TC_hdb) \ ((TC_hdb)->mmtx ? tchdblockdb(TC_hdb) : true)#define HDBUNLOCKDB(TC_hdb) \ ((TC_hdb)->mmtx ? tchdbunlockdb(TC_hdb) : true)#define HDBLOCKWAL(TC_hdb) \ ((TC_hdb)->mmtx ? tchdblockwal(TC_hdb) : true)#define HDBUNLOCKWAL(TC_hdb) \ ((TC_hdb)->mmtx ? tchdbunlockwal(TC_hdb) : true)#define HDBTHREADYIELD(TC_hdb) \ do { if((TC_hdb)->mmtx) sched_yield(); } while(false)/* private function prototypes */static uint64_t tcgetprime(uint64_t num);static bool tchdbseekwrite(TCHDB *hdb, off_t off, const void *buf, size_t size);static bool tchdbseekread(TCHDB *hdb, off_t off, void *buf, size_t size);static bool tchdbseekreadtry(TCHDB *hdb, off_t off, void *buf, size_t size);static void tchdbdumpmeta(TCHDB *hdb, char *hbuf);static void tchdbloadmeta(TCHDB *hdb, const char *hbuf);static void tchdbclear(TCHDB *hdb);static int32_t tchdbpadsize(TCHDB *hdb, uint64_t off);static void tchdbsetflag(TCHDB *hdb, int flag, bool sign);static uint64_t tchdbbidx(TCHDB *hdb, const char *kbuf, int ksiz, uint8_t *hp);static off_t tchdbgetbucket(TCHDB *hdb, uint64_t bidx);static void tchdbsetbucket(TCHDB *hdb, uint64_t bidx, uint64_t off);static bool tchdbsavefbp(TCHDB *hdb);static bool tchdbloadfbp(TCHDB *hdb);static void tcfbpsortbyoff(HDBFB *fbpool, int fbpnum);static void tcfbpsortbyrsiz(HDBFB *fbpool, int fbpnum);static void tchdbfbpmerge(TCHDB *hdb);static void tchdbfbpinsert(TCHDB *hdb, uint64_t off, uint32_t rsiz);static bool tchdbfbpsearch(TCHDB *hdb, TCHREC *rec);static bool tchdbfbpsplice(TCHDB *hdb, TCHREC *rec, uint32_t nsiz);static bool tchdbwritefb(TCHDB *hdb, uint64_t off, uint32_t rsiz);static bool tchdbwriterec(TCHDB *hdb, TCHREC *rec, uint64_t bidx, off_t entoff);static bool tchdbreadrec(TCHDB *hdb, TCHREC *rec, char *rbuf);static bool tchdbreadrecbody(TCHDB *hdb, TCHREC *rec);static bool tchdbremoverec(TCHDB *hdb, TCHREC *rec, char *rbuf, uint64_t bidx, off_t entoff);static int tcreckeycmp(const char *abuf, int asiz, const char *bbuf, int bsiz);static bool tchdbflushdrp(TCHDB *hdb);static void tchdbcacheadjust(TCHDB *hdb);static bool tchdbwalinit(TCHDB *hdb);static bool tchdbwalwrite(TCHDB *hdb, uint64_t off, int64_t size);static int tchdbwalrestore(TCHDB *hdb, const char *path);static bool tchdbwalremove(TCHDB *hdb, const char *path);static bool tchdbopenimpl(TCHDB *hdb, const char *path, int omode);static bool tchdbcloseimpl(TCHDB *hdb);static bool tchdbputimpl(TCHDB *hdb, const char *kbuf, int ksiz, uint64_t bidx, uint8_t hash, const char *vbuf, int vsiz, int dmode);static void tchdbdrpappend(TCHDB *hdb, const char *kbuf, int ksiz, const char *vbuf, int vsiz, uint8_t hash);static bool tchdbputasyncimpl(TCHDB *hdb, const char *kbuf, int ksiz, uint64_t bidx, uint8_t hash, const char *vbuf, int vsiz);static bool tchdboutimpl(TCHDB *hdb, const char *kbuf, int ksiz, uint64_t bidx, uint8_t hash);static char *tchdbgetimpl(TCHDB *hdb, const char *kbuf, int ksiz, uint64_t bidx, uint8_t hash, int *sp);static int tchdbgetintobuf(TCHDB *hdb, const char *kbuf, int ksiz, uint64_t bidx, uint8_t hash, char *vbuf, int max);static char *tchdbgetnextimpl(TCHDB *hdb, const char *kbuf, int ksiz, int *sp, const char **vbp, int *vsp);static int tchdbvsizimpl(TCHDB *hdb, const char *kbuf, int ksiz, uint64_t bidx, uint8_t hash);static bool tchdbiterinitimpl(TCHDB *hdb);static char *tchdbiternextimpl(TCHDB *hdb, int *sp);static bool tchdbiternextintoxstr(TCHDB *hdb, TCXSTR *kxstr, TCXSTR *vxstr);static bool tchdboptimizeimpl(TCHDB *hdb, int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts);static bool tchdbvanishimpl(TCHDB *hdb);static bool tchdbcopyimpl(TCHDB *hdb, const char *path);static bool tchdbforeachimpl(TCHDB *hdb, TCITER iter, void *op);static bool tchdblockmethod(TCHDB *hdb, bool wr);static bool tchdbunlockmethod(TCHDB *hdb);static bool tchdblockrecord(TCHDB *hdb, uint8_t bidx, bool wr);static bool tchdbunlockrecord(TCHDB *hdb, uint8_t bidx);static bool tchdblockallrecords(TCHDB *hdb, bool wr);static bool tchdbunlockallrecords(TCHDB *hdb);static bool tchdblockdb(TCHDB *hdb);static bool tchdbunlockdb(TCHDB *hdb);static bool tchdblockwal(TCHDB *hdb);static bool tchdbunlockwal(TCHDB *hdb);/* debugging function prototypes */void tchdbprintmeta(TCHDB *hdb);void tchdbprintrec(TCHDB *hdb, TCHREC *rec);/************************************************************************************************* * API *************************************************************************************************//* Get the message string corresponding to an error code. */const char *tchdberrmsg(int ecode){ return tcerrmsg(ecode);}/* Create a hash database object. */TCHDB *tchdbnew(void){ TCHDB *hdb; TCMALLOC(hdb, sizeof(*hdb)); tchdbclear(hdb); return hdb;}/* Delete a hash database object. */void tchdbdel(TCHDB *hdb){ assert(hdb); if(hdb->fd >= 0) tchdbclose(hdb); if(hdb->mmtx){ pthread_key_delete(*(pthread_key_t *)hdb->eckey); pthread_mutex_destroy(hdb->wmtx); pthread_mutex_destroy(hdb->tmtx); pthread_mutex_destroy(hdb->dmtx); for(int i = UINT8_MAX; i >= 0; i--){ pthread_rwlock_destroy((pthread_rwlock_t *)hdb->rmtxs + i); } pthread_rwlock_destroy(hdb->mmtx); TCFREE(hdb->eckey); TCFREE(hdb->wmtx); TCFREE(hdb->tmtx); TCFREE(hdb->dmtx); TCFREE(hdb->rmtxs); TCFREE(hdb->mmtx); } TCFREE(hdb);}/* Get the last happened error code of a hash database object. */int tchdbecode(TCHDB *hdb){ assert(hdb); return hdb->mmtx ? (int)(intptr_t)pthread_getspecific(*(pthread_key_t *)hdb->eckey) : hdb->ecode;}/* Set mutual exclusion control of a hash database object for threading. */bool tchdbsetmutex(TCHDB *hdb){ assert(hdb); if(!TCUSEPTHREAD) return true; if(hdb->mmtx || hdb->fd >= 0){ tchdbsetecode(hdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } pthread_mutexattr_t rma; pthread_mutexattr_init(&rma); TCMALLOC(hdb->mmtx, sizeof(pthread_rwlock_t)); TCMALLOC(hdb->rmtxs, (UINT8_MAX + 1) * sizeof(pthread_rwlock_t)); TCMALLOC(hdb->dmtx, sizeof(pthread_mutex_t)); TCMALLOC(hdb->tmtx, sizeof(pthread_mutex_t)); TCMALLOC(hdb->wmtx, sizeof(pthread_mutex_t)); TCMALLOC(hdb->eckey, sizeof(pthread_key_t)); bool err = false; if(pthread_mutexattr_settype(&rma, PTHREAD_MUTEX_RECURSIVE) != 0) err = true; if(pthread_rwlock_init(hdb->mmtx, NULL) != 0) err = true; for(int i = 0; i <= UINT8_MAX; i++){ if(pthread_rwlock_init((pthread_rwlock_t *)hdb->rmtxs + i, NULL) != 0) err = true; } if(pthread_mutex_init(hdb->dmtx, &rma) != 0) err = true; if(pthread_mutex_init(hdb->tmtx, NULL) != 0) err = true; if(pthread_mutex_init(hdb->wmtx, NULL) != 0) err = true; if(pthread_key_create(hdb->eckey, NULL) != 0) err = true; if(err){ tchdbsetecode(hdb, TCETHREAD, __FILE__, __LINE__, __func__); pthread_mutexattr_destroy(&rma); TCFREE(hdb->eckey); TCFREE(hdb->wmtx); TCFREE(hdb->tmtx); TCFREE(hdb->dmtx); TCFREE(hdb->rmtxs); TCFREE(hdb->mmtx); hdb->eckey = NULL; hdb->wmtx = NULL; hdb->tmtx = NULL; hdb->dmtx = NULL; hdb->rmtxs = NULL; hdb->mmtx = NULL; return false; } pthread_mutexattr_destroy(&rma); return true;}/* Set the tuning parameters of a hash database object. */bool tchdbtune(TCHDB *hdb, int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){ assert(hdb); if(hdb->fd >= 0){ tchdbsetecode(hdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } hdb->bnum = (bnum > 0) ? tcgetprime(bnum) : HDBDEFBNUM; hdb->apow = (apow >= 0) ? tclmin(apow, HDBMAXAPOW) : HDBDEFAPOW; hdb->fpow = (fpow >= 0) ? tclmin(fpow, HDBMAXFPOW) : HDBDEFFPOW; hdb->opts = opts; if(!_tc_deflate) hdb->opts &= ~HDBTDEFLATE; if(!_tc_bzcompress) hdb->opts &= ~HDBTBZIP; return true;}/* Set the caching parameters of a hash database object. */bool tchdbsetcache(TCHDB *hdb, int32_t rcnum){ assert(hdb); if(hdb->fd >= 0){ tchdbsetecode(hdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } hdb->rcnum = (rcnum > 0) ? tclmin(tclmax(rcnum, HDBCACHEOUT * 2), INT_MAX / 4) : 0; return true;}/* Set the size of the extra mapped memory of a hash database object. */bool tchdbsetxmsiz(TCHDB *hdb, int64_t xmsiz){ assert(hdb); if(hdb->fd >= 0){ tchdbsetecode(hdb, TCEINVALID, __FILE__, __LINE__, __func__); return false; } hdb->xmsiz = (xmsiz > 0) ? tcpagealign(xmsiz) : 0; return true;}/* Open a database file and connect a hash database object. */bool tchdbopen(TCHDB *hdb, const char *path, int omode){ assert(hdb && path); if(!HDBLOCKMETHOD(hdb, true)) return false; if(hdb->fd >= 0){ tchdbsetecode(hdb, TCEINVALID, __FILE__, __LINE__, __func__); HDBUNLOCKMETHOD(hdb); return false; } bool rv = tchdbopenimpl(hdb, path, omode); HDBUNLOCKMETHOD(hdb); return rv;}/* Close a database object. */bool tchdbclose(TCHDB *hdb){ assert(hdb); if(!HDBLOCKMETHOD(hdb, true)) return false; if(hdb->fd < 0){ tchdbsetecode(hdb, TCEINVALID, __FILE__, __LINE__, __func__); HDBUNLOCKMETHOD(hdb); return false; } bool rv = tchdbcloseimpl(hdb); HDBUNLOCKMETHOD(hdb); return rv;}/* Store a record into a hash database object. */bool tchdbput(TCHDB *hdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){ assert(hdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0); if(!HDBLOCKMETHOD(hdb, false)) return false; uint8_t hash; uint64_t bidx = tchdbbidx(hdb, kbuf, ksiz, &hash); if(hdb->fd < 0 || !(hdb->omode & HDBOWRITER)){ tchdbsetecode(hdb, TCEINVALID, __FILE__, __LINE__, __func__); HDBUNLOCKMETHOD(hdb); return false; } if(hdb->async && !tchdbflushdrp(hdb)){ HDBUNLOCKMETHOD(hdb); return false; } if(!HDBLOCKRECORD(hdb, bidx, true)){ HDBUNLOCKMETHOD(hdb); return false; } if(hdb->zmode){ char *zbuf; if(hdb->opts & HDBTDEFLATE){ zbuf = _tc_deflate(vbuf, vsiz, &vsiz, _TCZMRAW); } else if(hdb->opts & HDBTBZIP){ zbuf = _tc_bzcompress(vbuf, vsiz, &vsiz); } else if(hdb->opts & HDBTTCBS){ zbuf = tcbsencode(vbuf, vsiz, &vsiz); } else { zbuf = hdb->enc(vbuf, vsiz, &vsiz, hdb->encop); } if(!zbuf){ tchdbsetecode(hdb, TCEMISC, __FILE__, __LINE__, __func__); HDBUNLOCKRECORD(hdb, bidx); HDBUNLOCKMETHOD(hdb); return false; } bool rv = tchdbputimpl(hdb, kbuf, ksiz, bidx, hash, zbuf, vsiz, HDBPDOVER); TCFREE(zbuf); HDBUNLOCKRECORD(hdb, bidx); HDBUNLOCKMETHOD(hdb); return rv; } bool rv = tchdbputimpl(hdb, kbuf, ksiz, bidx, hash, vbuf, vsiz, HDBPDOVER); HDBUNLOCKRECORD(hdb, bidx); HDBUNLOCKMETHOD(hdb); return rv;}/* Store a string record into a hash database object. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -