⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 depot.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************************************************* * Implementation of Depot *                                                      Copyright (C) 2000-2003 Mikio Hirabayashi * This file is part of QDBM, Quick Database Manager. * QDBM is free software; you can redistribute it and/or modify it under the terms of the GNU * Lesser General Public License as published by the Free Software Foundation; either version * 2.1 of the License or any later version.  QDBM is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more * details. * You should have received a copy of the GNU Lesser General Public License along with QDBM; if * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. *************************************************************************************************/#include "depot.h"#include "myconf.h"#define DP_FILEMODE    00644             /* permission of a creating file */#define DP_MAGICNUMB   "[DEPOT]\n\f"     /* magic number on environments of big endian */#define DP_MAGICNUML   "[depot]\n\f"     /* magic number on environments of little endian */#define DP_HEADSIZ     48                /* size of the reagion of the header */#define DP_FLAGSOFF    16                /* offset of the region for flags */#define DP_FSIZOFF     24                /* offset of the region for the file size */#define DP_BNUMOFF     32                /* offset of the region for the bucket number */#define DP_RNUMOFF     40                /* offset of the region for the record number */#define DP_DEFBNUM     8191              /* default bucket number */#define DP_COALMAX     3                 /* number of trials for record coalescence */#define DP_TMPFSUF     MYEXTSTR "dptmp"  /* suffix of a temporary file */#define DP_OPTBLOAD    0.25              /* ratio of bucket loading at optimization */#define DP_OPTRUNIT    256               /* number of records in a process of optimization */#define DP_IOBUFSIZ    8192              /* size of an I/O buffer */enum {                                   /* enumeration for a record header */  DP_RHIFLAGS,                           /* offset of flags */  DP_RHIHASH,                            /* offset of value of the second hash function */  DP_RHIKSIZ,                            /* offset of the size of the key */  DP_RHIVSIZ,                            /* offset of the size of the value */  DP_RHIPSIZ,                            /* offset of the size of the padding bytes */  DP_RHILEFT,                            /* offset of the offset of the left child */  DP_RHIRIGHT,                           /* offset of the offset of the right child */  DP_RHNUM                               /* number of elements of a header */};enum {                                   /* enumeration for the flag of a record */  DP_RECFDEL = 1 << 0,                   /* deleted */  DP_RECFREUSE = 1 << 1                  /* reusable */};/* private function prototypes */static int dpbigendian(void);static char *dpstrdup(const char *str);static int dplock(int fd, int ex);static int dpwrite(int fd, const void *buf, int size);static int dpseekwrite(int fd, int off, const void *buf, int size);static int dpseekwritenum(int fd, int off, int num);static int dpread(int fd, void *buf, int size);static int dpseekread(int fd, int off, void *buf, int size);static int dpfcopy(int destfd, int destoff, int srcfd, int srcoff);static int dpfirsthash(const char *kbuf, int ksiz);static int dpsecondhash(const char *kbuf, int ksiz);static int dpthirdhash(const char *kbuf, int ksiz);static int dpgetprime(int num);static int dppadsize(DEPOT *depot, int vsiz);static int dprecsize(int *head);static int dprechead(DEPOT *depot, int off, int *head);static char *dpreckey(DEPOT *depot, int off, int *head);static char *dprecval(DEPOT *depot, int off, int *head, int start, int max);static int dpkeycmp(const char *abuf, int asiz, const char *bbuf, int bsiz);static int dprecsearch(DEPOT *depot, const char *kbuf, int ksiz, int hash,                       int *bip, int *offp, int *entp, int *head, int delhit);static int dprecrewrite(DEPOT *depot, int off, int rsiz, const char *kbuf, int ksiz,                        const char *vbuf, int vsiz, int hash, int left, int right);static int dprecappend(DEPOT *depot, const char *kbuf, int ksiz, const char *vbuf, int vsiz,                       int hash, int left, int right);static int dprecover(DEPOT *depot, int off, int *head, const char *vbuf, int vsiz, int cat);static int dprecdelete(DEPOT *depot, int off, int *head, int reusable);/************************************************************************************************* * public objects *************************************************************************************************//* String containing the version information. */const char *dpversion = _QDBM_VERSION;/* Last happened error code. */int dpecode = DP_ENOERR;/* Get a message string corresponding to an error code. */const char *dperrmsg(int ecode){  switch(ecode){  case DP_ENOERR: return "no error";  case DP_EFATAL: return "with fatal error";  case DP_EMODE: return "invalid mode";  case DP_EBROKEN: return "broken database file";  case DP_EKEEP: return "existing record";  case DP_ENOITEM: return "no item found";  case DP_EALLOC: return "memory allocation error";  case DP_EMAP: return "memory mapping error";  case DP_EOPEN: return "open error";  case DP_ECLOSE: return "close error";  case DP_ETRUNC: return "trunc error";  case DP_ESYNC: return "sync error";  case DP_ESTAT: return "stat error";  case DP_ESEEK: return "seek error";  case DP_EREAD: return "read error";  case DP_EWRITE: return "write error";  case DP_ELOCK: return "lock error";  case DP_EUNLINK: return "unlink error";  case DP_EMKDIR: return "mkdir error";  case DP_ERMDIR: return "rmdir error";  case DP_EMISC: return "miscellaneous error";  }  return "(invalid ecode)";}/* Get a database handle. */DEPOT *dpopen(const char *name, int omode, int bnum){  int mode, fd, inode, fsiz, rnum, c, msiz;  char hbuf[DP_HEADSIZ], *map;  struct stat sbuf;  DEPOT *depot;  assert(name);  mode = O_RDONLY;  if(omode & DP_OWRITER){    mode = O_RDWR;    if(omode & DP_OCREAT) mode |= O_CREAT;  }  if((fd = open(name, mode, DP_FILEMODE)) == -1){    dpecode = DP_EOPEN;    return NULL;  }  if(!(omode & DP_ONOLCK)){    if(!dplock(fd, omode & DP_OWRITER)){      close(fd);      return NULL;    }  }  if((omode & DP_OWRITER) && (omode & DP_OTRUNC)){    if(ftruncate(fd, 0) == -1){      close(fd);      dpecode = DP_ETRUNC;      return NULL;    }  }  if(fstat(fd, &sbuf) == -1 || !S_ISREG(sbuf.st_mode)){    close(fd);    dpecode = DP_ESTAT;    return NULL;  }  inode = sbuf.st_ino;  fsiz = sbuf.st_size;  if((omode & DP_OWRITER) && fsiz == 0){    memset(hbuf, 0, DP_HEADSIZ);    if(dpbigendian()){      memcpy(hbuf, DP_MAGICNUMB, strlen(DP_MAGICNUMB));    } else {      memcpy(hbuf, DP_MAGICNUML, strlen(DP_MAGICNUML));    }    bnum = bnum < 1 ? DP_DEFBNUM : bnum;    bnum = dpgetprime(bnum);    memcpy(hbuf + DP_BNUMOFF, &bnum, sizeof(int));    rnum = 0;    memcpy(hbuf + DP_RNUMOFF, &rnum, sizeof(int));    c = 0;    fsiz = DP_HEADSIZ + bnum * sizeof(int);    memcpy(hbuf + DP_FSIZOFF, &fsiz, sizeof(int));    if(!dpseekwrite(fd, 0, hbuf, DP_HEADSIZ) ||       !dpseekwrite(fd, fsiz - 1, &c, 1)){      close(fd);      return NULL;    }  }  if(!dpseekread(fd, 0, hbuf, DP_HEADSIZ)){    close(fd);    dpecode = DP_EBROKEN;    return NULL;  }  if((dpbigendian() ? memcmp(hbuf, DP_MAGICNUMB, strlen(DP_MAGICNUMB)) != 0 :      memcmp(hbuf, DP_MAGICNUML, strlen(DP_MAGICNUML)) != 0) ||     memcmp(hbuf + DP_FSIZOFF, &fsiz, sizeof(int)) != 0){    close(fd);    dpecode = DP_EBROKEN;    return NULL;  }  bnum = *((int *)(hbuf + DP_BNUMOFF));  rnum = *((int *)(hbuf + DP_RNUMOFF));  if(bnum < 1 || rnum < 0 || fsiz < DP_HEADSIZ + bnum * sizeof(int)){    close(fd);    dpecode = DP_EBROKEN;    return NULL;  }  msiz = DP_HEADSIZ + bnum * sizeof(int);  map = mmap(0, msiz, PROT_READ | ((mode & DP_OWRITER) ? PROT_WRITE : 0), MAP_SHARED, fd, 0);  if(map == MAP_FAILED){    close(fd);    dpecode = DP_EMAP;    return NULL;  }  if(!(depot = malloc(sizeof(DEPOT))) || !(depot->name = dpstrdup(name))){    munmap(map, msiz);    free(depot);    close(fd);    dpecode = DP_EALLOC;    return NULL;  }  depot->wmode = (mode & DP_OWRITER);  depot->inode = inode;  depot->fd = fd;  depot->fsiz = fsiz;  depot->map = map;  depot->msiz = msiz;  depot->buckets = (int *)(map + DP_HEADSIZ);  depot->bnum = bnum;  depot->rnum = rnum;  depot->fatal = FALSE;  depot->ioff = 0;  depot->mroff = -1;  depot->mrsiz = -1;  depot->align = 0;  return depot;}/* Close a database handle. */int dpclose(DEPOT *depot){  int err;  assert(depot);  err = FALSE;  if(depot->wmode){    *((int *)(depot->map + DP_FSIZOFF)) = depot->fsiz;    *((int *)(depot->map + DP_RNUMOFF)) = depot->rnum;  }  if(depot->map != MAP_FAILED){    if(munmap(depot->map, depot->msiz) == -1){      err = TRUE;      dpecode = DP_EMAP;    }  }  if(close(depot->fd) == -1){    err = TRUE;    dpecode = DP_ECLOSE;  }  free(depot->name);  free(depot);  return err ? FALSE : TRUE;}/* Store a record. */int dpput(DEPOT *depot, const char *kbuf, int ksiz, const char *vbuf, int vsiz, int dmode){  int head[DP_RHNUM], next[DP_RHNUM];  int i, hash, bi, off, entoff, newoff, rsiz, nsiz, fdel, mroff, mrsiz;  char *tval;  assert(depot && kbuf && vbuf);  if(depot->fatal){    dpecode = DP_EFATAL;    return FALSE;  }  if(!depot->wmode){    dpecode = DP_EMODE;    return FALSE;  }  if(ksiz < 0) ksiz = strlen(kbuf);  if(vsiz < 0) vsiz = strlen(vbuf);  newoff = -1;  hash = dpsecondhash(kbuf, ksiz);  switch(dprecsearch(depot, kbuf, ksiz, hash, &bi, &off, &entoff, head, TRUE)){  case -1:    depot->fatal = TRUE;    return FALSE;  case 0:    fdel = head[DP_RHIFLAGS] & DP_RECFDEL;    if(dmode == DP_DKEEP && !fdel){      dpecode = DP_EKEEP;      return FALSE;    }    if(fdel){      head[DP_RHIPSIZ] += head[DP_RHIVSIZ];      head[DP_RHIVSIZ] = 0;    }    rsiz = dprecsize(head);    nsiz = DP_RHNUM * sizeof(int) + ksiz + vsiz;    if(dmode == DP_DCAT) nsiz += head[DP_RHIVSIZ];    if(off + rsiz >= depot->fsiz){      if(rsiz < nsiz){        head[DP_RHIPSIZ] += nsiz - rsiz;        rsiz = nsiz;        depot->fsiz = off + rsiz;      }    } else {      for(i = 0; i < DP_COALMAX && nsiz > rsiz && off + rsiz < depot->fsiz; i++){        if(off + rsiz == depot->mroff){          depot->mroff = -1;          depot->mrsiz = -1;        }        if(!dprechead(depot, off + rsiz, next)) return FALSE;        if(!(next[DP_RHIFLAGS] & DP_RECFREUSE)) break;        head[DP_RHIPSIZ] += dprecsize(next);        rsiz += dprecsize(next);      }    }    if(nsiz <= rsiz){      if(!dprecover(depot, off, head, vbuf, vsiz, dmode == DP_DCAT)){        depot->fatal = TRUE;        return FALSE;      }    } else {      tval = NULL;      if(dmode == DP_DCAT){        if(!(tval = dprecval(depot, off, head, 0, -1))){          depot->fatal = TRUE;          return FALSE;        }        if(!(tval = realloc(tval, head[DP_RHIVSIZ] + vsiz))){          free(tval);          dpecode = DP_EALLOC;          depot->fatal = TRUE;          return FALSE;        }        memcpy(tval + head[DP_RHIVSIZ], vbuf, vsiz);        vsiz += head[DP_RHIVSIZ];        vbuf = tval;      }      mroff = depot->mroff;      mrsiz = depot->mrsiz;      if(!dprecdelete(depot, off, head, TRUE)){        free(tval);        depot->fatal = TRUE;        return FALSE;      }      if(mroff > 0 && nsiz <= mrsiz){        if(!dprecrewrite(depot, mroff, mrsiz, kbuf, ksiz, vbuf, vsiz,                         hash, head[DP_RHILEFT], head[DP_RHIRIGHT])){          free(tval);          depot->fatal = TRUE;          return FALSE;        }        newoff = mroff;      } else {        if((newoff = dprecappend(depot, kbuf, ksiz, vbuf, vsiz,                                 hash, head[DP_RHILEFT], head[DP_RHIRIGHT])) == -1){          free(tval);          depot->fatal = TRUE;          return FALSE;        }      }      free(tval);    }    if(fdel) depot->rnum++;    break;  default:    if((newoff = dprecappend(depot, kbuf, ksiz, vbuf, vsiz, hash, 0, 0)) == -1){      depot->fatal = TRUE;      return FALSE;    }    depot->rnum++;    break;  }  if(newoff > 0){    if(entoff > 0){      if(!dpseekwritenum(depot->fd, entoff, newoff)){        depot->fatal = TRUE;        return FALSE;      }    } else {      depot->buckets[bi] = newoff;    }  }  return TRUE;}/* Delete a record. */int dpout(DEPOT *depot, const char *kbuf, int ksiz){  int head[DP_RHNUM], hash, bi, off, entoff;  assert(depot && kbuf);  if(depot->fatal){    dpecode = DP_EFATAL;    return FALSE;  }  if(!depot->wmode){    dpecode = DP_EMODE;    return FALSE;  }  if(ksiz < 0) ksiz = strlen(kbuf);  hash = dpsecondhash(kbuf, ksiz);  switch(dprecsearch(depot, kbuf, ksiz, hash, &bi, &off, &entoff, head, FALSE)){  case -1:    depot->fatal = TRUE;    return FALSE;  case 0:    break;  default:    dpecode = DP_ENOITEM;    return FALSE;  }  if(!dprecdelete(depot, off, head, FALSE)){    depot->fatal = TRUE;    return FALSE;  }  depot->rnum--;  return TRUE;}/* Retrieve a record. */char *dpget(DEPOT *depot, const char *kbuf, int ksiz, int start, int max, int *sp){  int head[DP_RHNUM], hash, bi, off, entoff;  char *vbuf;  assert(depot && kbuf && start >= 0);  if(depot->fatal){    dpecode = DP_EFATAL;    return NULL;  }  if(ksiz < 0) ksiz = strlen(kbuf);  hash = dpsecondhash(kbuf, ksiz);  switch(dprecsearch(depot, kbuf, ksiz, hash, &bi, &off, &entoff, head, FALSE)){  case -1:    depot->fatal = TRUE;    return NULL;  case 0:    break;  default:    dpecode = DP_ENOITEM;    return NULL;  }  if(start > head[DP_RHIVSIZ]){    dpecode = DP_ENOITEM;    return NULL;  }  if(!(vbuf = dprecval(depot, off, head, start, max))){    depot->fatal = TRUE;    return NULL;  }  if(sp){    if(max < 0){      *sp = head[DP_RHIVSIZ];    } else {      *sp = max < head[DP_RHIVSIZ] ? max : head[DP_RHIVSIZ];    }  }  return vbuf;}/* Get the size of the value of a record. */int dpvsiz(DEPOT *depot, const char *kbuf, int ksiz){  int head[DP_RHNUM], hash, bi, off, entoff;  assert(depot && kbuf);  if(depot->fatal){    dpecode = DP_EFATAL;    return -1;  }  if(ksiz < 0) ksiz = strlen(kbuf);  hash = dpsecondhash(kbuf, ksiz);  switch(dprecsearch(depot, kbuf, ksiz, hash, &bi, &off, &entoff, head, FALSE)){  case -1:    depot->fatal = TRUE;    return -1;  case 0:    break;  default:    dpecode = DP_ENOITEM;    return -1;  }  return head[DP_RHIVSIZ];}/* Initialize the iterator of a database handle. */int dpiterinit(DEPOT *depot){  assert(depot);  if(depot->fatal){    dpecode = DP_EFATAL;    return FALSE;  }  depot->ioff = 0;  return TRUE;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -