⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 villa.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 4 页
字号:
/************************************************************************************************* * Implementation of Villa *                                                      Copyright (C) 2000-2003 Mikio Hirabayashi * This file is part of QDBM, Quick Database Manager. * QDBM is free software; you can redistribute it and/or modify it under the terms of the GNU * Lesser General Public License as published by the Free Software Foundation; either version * 2.1 of the License or any later version.  QDBM is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more * details. * You should have received a copy of the GNU Lesser General Public License along with QDBM; if * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. *************************************************************************************************/#include "villa.h"#include "myconf.h"#define VL_LEAFIDMIN   1                 /* minimum number of leaf ID */#define VL_NODEIDMIN   100000000         /* minimum number of node ID */#define VL_VNUMBUFSIZ  8                 /* size of a buffer for variable length number */#define VL_LEVELMAX    64                /* max level of B+ tree */#define VL_DEFLRECMAX  49                /* default number of records in each leaf */#define VL_DEFNIDXMAX  192               /* default number of indexes in each node */#define VL_DEFLCNUM    1024              /* default number of leaf cache */#define VL_DEFNCNUM    512               /* default number of node cache */#define VL_ALIGNRATIO  1.4               /* ratio between alignment and page size */#define VL_CACHEOUT    8                 /* number of pages in a process of cacheout */#define VL_INITBNUM    32749             /* initial bucket number */#define VL_INITALIGN   448               /* initial size of alignment */#define VL_OPTALIGN    -3                /* alignment setting when optimization */#define VL_PATHBUFSIZ  1024              /* size of a path buffer */#define VL_TMPFSUF     MYEXTSTR "vltmp"  /* suffix of a temporary file */#define VL_ROOTKEY     -1                /* key of the root key */#define VL_LASTKEY     -2                /* key of the last key */#define VL_LNUMKEY     -3                /* key of the number of leaves */#define VL_NNUMKEY     -4                /* key of the number of nodes */#define VL_RNUMKEY     -5                /* key of the number of records */enum {                                   /* enumeration for flags */  VL_FLISVILLA = 1 << 0,                 /* whether for Villa or not */  VL_FLISZLIB = 1 << 1                   /* whether with ZLIB or not */};/* private function prototypes */static int vllexcompare(const char *aptr, int asiz, const char *bptr, int bsiz);static int vlintcompare(const char *aptr, int asiz, const char *bptr, int bsiz);static int vlnumcompare(const char *aptr, int asiz, const char *bptr, int bsiz);static int vldeccompare(const char *aptr, int asiz, const char *bptr, int bsiz);static int vldpputnum(DEPOT *depot, int knum, int vnum);static int vldpgetnum(DEPOT *depot, int knum, int *vnp);static int vlsetvnumbuf(char *buf, int num);static int vlreadvnumbuf(const char *buf, int size, int *sp);static VLLEAF *vlleafnew(VILLA *villa, int prev, int next);static int vlleafcacheout(VILLA *villa, int id);static int vlleafsave(VILLA *villa, VLLEAF *leaf);static VLLEAF *vlleafload(VILLA *villa, int id);static int vlleafaddrec(VILLA *villa, VLLEAF *leaf, int dmode,                        const char *kbuf, int ksiz, const char *vbuf, int vsiz);static VLLEAF *vlleafdivide(VILLA *villa, VLLEAF *leaf);static VLNODE *vlnodenew(VILLA *villa, int heir);static int vlnodecacheout(VILLA *villa, int id);static int vlnodesave(VILLA *villa, VLNODE *node);static VLNODE *vlnodeload(VILLA *villa, int id);static void vlnodeaddidx(VILLA *villa, VLNODE *node, int order,                         int pid, const char *kbuf, int ksiz);static int vlsearchleaf(VILLA *villa, const char *kbuf, int ksiz, int *hist, int *hnp);static int vlcacheadjust(VILLA *villa);static VLREC *vlrecsearch(VILLA *villa, VLLEAF *leaf, const char *kbuf, int ksiz, int *ip);/************************************************************************************************* * public objects *************************************************************************************************//* Comparing functions. */VLCFUNC VL_CMPLEX = vllexcompare;VLCFUNC VL_CMPINT = vlintcompare;VLCFUNC VL_CMPNUM = vlnumcompare;VLCFUNC VL_CMPDEC = vldeccompare;/* Get a database handle. */VILLA *vlopen(const char *name, int omode, VLCFUNC cmp){  DEPOT *depot;  int dpomode, flags, root, last, lnum, nnum, rnum;  VILLA *villa;  VLLEAF *leaf;  assert(name && cmp);  dpomode = DP_OREADER;  if(omode & VL_OWRITER){    dpomode = DP_OWRITER;    if(omode & VL_OCREAT) dpomode |= DP_OCREAT;    if(omode & VL_OTRUNC) dpomode |= DP_OTRUNC;  }  if(omode & VL_ONOLCK) dpomode |= DP_ONOLCK;  if(!(depot = dpopen(name, dpomode, VL_INITBNUM))) return NULL;  flags = dpgetflags(depot);  root = -1;  last = -1;  lnum = 0;  nnum = 0;  rnum = 0;  if(dprnum(depot) > 0){    if(!(flags & VL_FLISVILLA) ||       !vldpgetnum(depot, VL_ROOTKEY, &root) || !vldpgetnum(depot, VL_LASTKEY, &last) ||       !vldpgetnum(depot, VL_LNUMKEY, &lnum) || !vldpgetnum(depot, VL_NNUMKEY, &nnum) ||       !vldpgetnum(depot, VL_RNUMKEY, &rnum) || root < VL_LEAFIDMIN || last < VL_LEAFIDMIN ||       lnum < 0 || nnum < 0 || rnum < 0){      dpclose(depot);      dpecode = DP_EBROKEN;      return NULL;    }  }  if(omode & VL_OWRITER){    flags |= VL_FLISVILLA;    if(_qdbm_deflate) flags |= VL_FLISZLIB;    if(!dpsetflags(depot, flags)){      dpclose(depot);      return NULL;    }  }  villa = cbmalloc(sizeof(VILLA));  villa->depot = depot;  villa->cmp = cmp;  villa->wmode = (omode & VL_OWRITER);  villa->root = root;  villa->last = last;  villa->lnum = lnum;  villa->nnum = nnum;  villa->rnum = rnum;  villa->leafc = cbmapopen();  villa->nodec = cbmapopen();  villa->curleaf = -1;  villa->curknum = -1;  villa->curvnum = -1;  villa->leafrecmax = VL_DEFLRECMAX;  villa->nodeidxmax = VL_DEFNIDXMAX;  villa->leafcnum = VL_DEFLCNUM;  villa->nodecnum = VL_DEFNCNUM;  villa->avglsiz = VL_INITALIGN;  villa->avgnsiz = VL_INITALIGN;  villa->tran = FALSE;  villa->rbroot = -1;  villa->rblast = -1;  villa->rblnum = -1;  villa->rbnnum = -1;  villa->rbrnum = -1;  if(root == -1){    leaf = vlleafnew(villa, -1, -1);    villa->root = leaf->id;    villa->last = leaf->id;  }  return villa;}/* Close a database handle. */int vlclose(VILLA *villa){  int err, pid;  const char *tmp;  assert(villa);  err = FALSE;  if(villa->tran){    if(!vltranabort(villa)) err = TRUE;  }  cbmapiterinit(villa->leafc);  while((tmp = cbmapiternext(villa->leafc, NULL)) != NULL){    pid = *(int *)tmp;    if(!vlleafcacheout(villa, pid)) err = TRUE;  }  cbmapiterinit(villa->nodec);  while((tmp = cbmapiternext(villa->nodec, NULL)) != NULL){    pid = *(int *)tmp;    if(!vlnodecacheout(villa, pid)) err = TRUE;  }  if(villa->wmode){    if(!dpsetalign(villa->depot, 0)) err = TRUE;    if(!vldpputnum(villa->depot, VL_ROOTKEY, villa->root)) err = TRUE;    if(!vldpputnum(villa->depot, VL_LASTKEY, villa->last)) err = TRUE;    if(!vldpputnum(villa->depot, VL_LNUMKEY, villa->lnum)) err = TRUE;    if(!vldpputnum(villa->depot, VL_NNUMKEY, villa->nnum)) err = TRUE;    if(!vldpputnum(villa->depot, VL_RNUMKEY, villa->rnum)) err = TRUE;  }  cbmapclose(villa->leafc);  cbmapclose(villa->nodec);  if(!dpclose(villa->depot)) err = TRUE;  free(villa);  return err ? FALSE : TRUE;}/* Store a record. */int vlput(VILLA *villa, const char *kbuf, int ksiz, const char *vbuf, int vsiz, int dmode){  VLLEAF *leaf, *newleaf;  VLNODE *node, *newnode;  VLIDX *idxp;  CBDATUM *key;  int hist[VL_LEVELMAX];  int i, hnum, pid, heir, parent, mid;  assert(villa && kbuf && vbuf);  villa->curleaf = -1;  villa->curknum = -1;  villa->curvnum = -1;  if(!villa->wmode){    dpecode = DP_EMODE;    return FALSE;  }  if(ksiz < 0) ksiz = strlen(kbuf);  if(vsiz < 0) vsiz = strlen(vbuf);  if((pid = vlsearchleaf(villa, kbuf, ksiz, hist, &hnum)) == -1) return FALSE;  if(!(leaf = vlleafload(villa, pid))) return FALSE;  if(!vlleafaddrec(villa, leaf, dmode, kbuf, ksiz, vbuf, vsiz)){    dpecode = DP_EKEEP;    return FALSE;  }  if(CB_LISTNUM(leaf->recs) > villa->leafrecmax && CB_LISTNUM(leaf->recs) % 2 == 0){    if(!(newleaf = vlleafdivide(villa, leaf))) return FALSE;    if(leaf->id == villa->last) villa->last = newleaf->id;    heir = leaf->id;    pid = newleaf->id;    key = ((VLREC *)CB_LISTVAL(newleaf->recs, 0, NULL))->key;    key = cbdatumopen(CB_DATUMPTR(key), CB_DATUMSIZE(key));    while(TRUE){      if(hnum < 1){        node = vlnodenew(villa, heir);        vlnodeaddidx(villa, node, TRUE, pid, CB_DATUMPTR(key), CB_DATUMSIZE(key));        villa->root = node->id;        cbdatumclose(key);        break;      }      parent = hist[--hnum];      if(!(node = vlnodeload(villa, parent))){        cbdatumclose(key);        return FALSE;      }      vlnodeaddidx(villa, node, FALSE, pid, CB_DATUMPTR(key), CB_DATUMSIZE(key));      cbdatumclose(key);      if(CB_LISTNUM(node->idxs) <= villa->nodeidxmax || CB_LISTNUM(node->idxs) % 2 == 0) break;      mid = CB_LISTNUM(node->idxs) / 2;      idxp = (VLIDX *)CB_LISTVAL(node->idxs, mid, NULL);      newnode = vlnodenew(villa, idxp->pid);      heir = node->id;      pid = newnode->id;      key = cbdatumopen(CB_DATUMPTR(idxp->key), CB_DATUMSIZE(idxp->key));      for(i = mid + 1; i < CB_LISTNUM(node->idxs); i++){        idxp = (VLIDX *)CB_LISTVAL(node->idxs, i, NULL);        vlnodeaddidx(villa, newnode, TRUE, idxp->pid,                     CB_DATUMPTR(idxp->key), CB_DATUMSIZE(idxp->key));      }      for(i = 0; i <= mid; i++){        idxp = (VLIDX *)cblistpop(node->idxs, NULL);        cbdatumclose(idxp->key);        free(idxp);      }      node->dirty = TRUE;    }  }  if(!villa->tran && !vlcacheadjust(villa)) return FALSE;  return TRUE;}/* Delete a record. */int vlout(VILLA *villa, const char *kbuf, int ksiz){  VLLEAF *leaf;  VLREC *recp;  int pid, ri, vsiz;  char *vbuf;  assert(villa && kbuf);  villa->curleaf = -1;  villa->curknum = -1;  villa->curvnum = -1;  if(!villa->wmode){    dpecode = DP_EMODE;    return FALSE;  }  if(ksiz < 0) ksiz = strlen(kbuf);  if((pid = vlsearchleaf(villa, kbuf, ksiz, NULL, NULL)) == -1) return FALSE;  if(!(leaf = vlleafload(villa, pid))) return FALSE;  if(!(recp = vlrecsearch(villa, leaf, kbuf, ksiz, &ri))){    dpecode = DP_ENOITEM;    return FALSE;  }  if(recp->rest){    cbdatumclose(recp->first);    vbuf = cblistshift(recp->rest, &vsiz);    recp->first = cbdatumopen(vbuf, vsiz);    free(vbuf);    if(CB_LISTNUM(recp->rest) < 1){      cblistclose(recp->rest);      recp->rest = NULL;    }  } else {    cbdatumclose(recp->key);    cbdatumclose(recp->first);    free(cblistremove(leaf->recs, ri, NULL));  }  leaf->dirty = TRUE;  villa->rnum--;  if(!villa->tran && !vlcacheadjust(villa)) return FALSE;  return TRUE;}/* Retrieve a record. */char *vlget(VILLA *villa, const char *kbuf, int ksiz, int *sp){  VLLEAF *leaf;  VLREC *recp;  int pid;  assert(villa && kbuf);  if(ksiz < 0) ksiz = strlen(kbuf);  if((pid = vlsearchleaf(villa, kbuf, ksiz, NULL, NULL)) == -1) return NULL;  if(!(leaf = vlleafload(villa, pid))) return NULL;  if(!(recp = vlrecsearch(villa, leaf, kbuf, ksiz, NULL))){    dpecode = DP_ENOITEM;    return NULL;  }  if(!villa->tran && !vlcacheadjust(villa)) return NULL;  if(sp) *sp = CB_DATUMSIZE(recp->first);  return cbmemdup(CB_DATUMPTR(recp->first), CB_DATUMSIZE(recp->first));}/* Get the number of records corresponding a key. */int vlvnum(VILLA *villa, const char *kbuf, int ksiz){  VLLEAF *leaf;  VLREC *recp;  int pid;  assert(villa && kbuf);  if(ksiz < 0) ksiz = strlen(kbuf);  if((pid = vlsearchleaf(villa, kbuf, ksiz, NULL, NULL)) == -1) return 0;  if(!(leaf = vlleafload(villa, pid))) return 0;  if(!(recp = vlrecsearch(villa, leaf, kbuf, ksiz, NULL))){    dpecode = DP_ENOITEM;    return 0;  }  if(!villa->tran && !vlcacheadjust(villa)) return 0;  return 1 + (recp->rest ? CB_LISTNUM(recp->rest) : 0);}/* Store plural records corresponding a key. */int vlputlist(VILLA *villa, const char *kbuf, int ksiz, CBLIST *vals){  int i, vsiz;  const char *vbuf;  assert(villa && kbuf && vals);  if(!villa->wmode){    dpecode = DP_EMODE;    return FALSE;  }  if(CB_LISTNUM(vals) < 1){    dpecode = DP_EMISC;    return FALSE;  }  if(ksiz < 0) ksiz = strlen(kbuf);  for(i = 0; i < CB_LISTNUM(vals); i++){    vbuf = cblistval(vals, i, &vsiz);    if(!vlput(villa, kbuf, ksiz, vbuf, vsiz, VL_DDUP)) return FALSE;  }  return TRUE;}/* Delete all records corresponding a key. */int vloutlist(VILLA *villa, const char *kbuf, int ksiz){  int i, vnum;  assert(villa && kbuf);  if(!villa->wmode){    dpecode = DP_EMODE;    return FALSE;  }  if(ksiz < 0) ksiz = strlen(kbuf);  if((vnum = vlvnum(villa, kbuf, ksiz)) < 1) return FALSE;  for(i = 0; i < vnum; i++){    if(!vlout(villa, kbuf, ksiz)) return FALSE;  }  return TRUE;}/* Retrieve values of all records corresponding a key. */CBLIST *vlgetlist(VILLA *villa, const char *kbuf, int ksiz){  VLLEAF *leaf;  VLREC *recp;  int pid, i, vsiz;  CBLIST *vals;  const char *vbuf;  assert(villa && kbuf);  if(ksiz < 0) ksiz = strlen(kbuf);  if((pid = vlsearchleaf(villa, kbuf, ksiz, NULL, NULL)) == -1) return NULL;  if(!(leaf = vlleafload(villa, pid))) return NULL;  if(!(recp = vlrecsearch(villa, leaf, kbuf, ksiz, NULL))){    dpecode = DP_ENOITEM;    return NULL;  }  vals = cblistopen();  cblistpush(vals, CB_DATUMPTR(recp->first), CB_DATUMSIZE(recp->first));  if(recp->rest){    for(i = 0; i < CB_LISTNUM(recp->rest); i++){      vbuf = cblistval(recp->rest, i, &vsiz);      cblistpush(vals, vbuf, vsiz);    }  }  if(!villa->tran && !vlcacheadjust(villa)){    cblistclose(vals);    return NULL;  }  return vals;}/* Move the cursor to the first record. */int vlcurfirst(VILLA *villa){  VLLEAF *leaf;  assert(villa);  villa->curleaf = VL_LEAFIDMIN;  villa->curknum = 0;  villa->curvnum = 0;  if(!(leaf = vlleafload(villa, villa->curleaf))){    villa->curleaf = -1;    return FALSE;  }  while(CB_LISTNUM(leaf->recs) < 1){    villa->curleaf = leaf->next;    villa->curknum = 0;    villa->curvnum = 0;    if(villa->curleaf == -1){      dpecode = DP_ENOITEM;      return FALSE;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -