⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 villa.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 4 页
字号:
    rp += step;    size -= step;    if(vnum < 1 || size < 1) break;    for(i = 0; i < vnum && size >= 1; i++){      vsiz = vlreadvnumbuf(rp, size, &step);      rp += step;      size -= step;      if(size < vsiz) break;      vbuf = rp;      rp += vsiz;      size -= vsiz;      if(i < 1){        rec.key = cbdatumopen(kbuf, ksiz);        rec.first = cbdatumopen(vbuf, vsiz);        rec.rest = NULL;      } else {        if(!rec.rest) rec.rest = cblistopen();        cblistpush(rec.rest, vbuf, vsiz);      }    }    if(i > 0) cblistpush(lent.recs, (char *)&rec, sizeof(VLREC));  }  free(buf);  cbmapput(villa->leafc, (char *)&(lent.id), sizeof(int), (char *)&lent, sizeof(VLLEAF), TRUE);  return (VLLEAF *)cbmapget(villa->leafc, (char *)&(lent.id), sizeof(int), NULL);}/* Add a record to a leaf.   `villa' specifies a database handle.   `leaf' specifies a leaf handle.   `dmode' specifies behavior when the key overlaps.   `kbuf' specifies the pointer to the region of a key.   `ksiz' specifies the size of the region of the key.   `vbuf' specifies the pointer to the region of a value.   `vsiz' specifies the size of the region of the value.   The return value is true if successful, else, it is false. */static int vlleafaddrec(VILLA *villa, VLLEAF *leaf, int dmode,                        const char *kbuf, int ksiz, const char *vbuf, int vsiz){  VLREC *recp, rec;  int i, rv, left, right, ln;  assert(villa && leaf && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);  left = 0;  ln = CB_LISTNUM(leaf->recs);  right = ln;  i = (left + right) / 2;  while(right >= left && i < ln){    recp = (VLREC *)CB_LISTVAL(leaf->recs, i, NULL);    rv = villa->cmp(kbuf, ksiz, CB_DATUMPTR(recp->key), CB_DATUMSIZE(recp->key));    if(rv == 0){      break;    } else if(rv <= 0){      right = i - 1;    } else {      left = i + 1;    }    i = (left + right) / 2;  }  while(i < ln){    recp = (VLREC *)CB_LISTVAL(leaf->recs, i, NULL);    rv = villa->cmp(kbuf, ksiz, CB_DATUMPTR(recp->key), CB_DATUMSIZE(recp->key));    if(rv == 0){      switch(dmode){      case VL_DOVER:        cbdatumclose(recp->first);        recp->first = cbdatumopen(vbuf, vsiz);        break;      case VL_DKEEP:        return FALSE;      default:        if(!recp->rest) recp->rest = cblistopen();        cblistpush(recp->rest, vbuf, vsiz);        villa->rnum++;        break;      }      break;    } else if(rv < 0){      rec.key = cbdatumopen(kbuf, ksiz);      rec.first = cbdatumopen(vbuf, vsiz);      rec.rest = NULL;      cblistinsert(leaf->recs, i, (char *)&rec, sizeof(VLREC));      villa->rnum++;      break;    }    i++;  }  if(i >= ln){    rec.key = cbdatumopen(kbuf, ksiz);    rec.first = cbdatumopen(vbuf, vsiz);    rec.rest = NULL;    cblistpush(leaf->recs, (char *)&rec, sizeof(VLREC));    villa->rnum++;  }  leaf->dirty = TRUE;  return TRUE;}/* Divide a leaf into two.   `villa' specifies a database handle.   `leaf' specifies a leaf handle.   The return value is the handle of a new leaf, or `NULL' on failure. */static VLLEAF *vlleafdivide(VILLA *villa, VLLEAF *leaf){  VLLEAF *newleaf, *nextleaf;  VLREC *recp;  int i, mid, ln;  assert(villa && leaf);  mid = CB_LISTNUM(leaf->recs) / 2;  recp = (VLREC *)CB_LISTVAL(leaf->recs, mid, NULL);  newleaf = vlleafnew(villa, leaf->id, leaf->next);  if(newleaf->next != -1){    if(!(nextleaf = vlleafload(villa, newleaf->next))) return NULL;    nextleaf->prev = newleaf->id;    nextleaf->dirty = TRUE;  }  leaf->next = newleaf->id;  leaf->dirty = TRUE;  ln = CB_LISTNUM(leaf->recs);  for(i = mid; i < ln; i++){    recp = (VLREC *)CB_LISTVAL(leaf->recs, i, NULL);    cblistpush(newleaf->recs, (char *)recp, sizeof(VLREC));  }  ln = CB_LISTNUM(newleaf->recs);  for(i = 0; i < ln; i++){    free(cblistpop(leaf->recs, NULL));  }  return newleaf;}/* Create a new node.   `villa' specifies a database handle.   `id' specifies the ID number of the node.   The return value is a handle of the node. */static VLNODE *vlnodenew(VILLA *villa, int heir){  VLNODE nent;  assert(villa && heir >= VL_LEAFIDMIN);  nent.id = villa->nnum + VL_NODEIDMIN;  nent.dirty = TRUE;  nent.heir = heir;  nent.idxs = cblistopen();  villa->nnum++;  cbmapput(villa->nodec, (char *)&(nent.id), sizeof(int), (char *)&nent, sizeof(VLNODE), TRUE);  return (VLNODE *)cbmapget(villa->nodec, (char *)&(nent.id), sizeof(int), NULL);}/* Remove a node from the cache.   `villa' specifies a database handle.   `id' specifies the ID number of the node.   The return value is true if successful, else, it is false. */static int vlnodecacheout(VILLA *villa, int id){  VLNODE *node;  VLIDX *idxp;  int i, err, ln;  assert(villa && id >= VL_NODEIDMIN);  if(!(node = (VLNODE *)cbmapget(villa->nodec, (char *)&id, sizeof(int), NULL))) return FALSE;  err = FALSE;  if(node->dirty){    if(!vlnodesave(villa, node)) err = TRUE;  }  ln = CB_LISTNUM(node->idxs);  for(i = 0; i < ln; i++){    idxp = (VLIDX *)CB_LISTVAL(node->idxs, i, NULL);    cbdatumclose(idxp->key);  }  cblistclose(node->idxs);  cbmapout(villa->nodec, (char *)&id, sizeof(int));  return err ? FALSE : TRUE;}/* Save a node into the database.   `villa' specifies a database handle.   `node' specifies a node handle.   The return value is true if successful, else, it is false. */static int vlnodesave(VILLA *villa, VLNODE *node){  CBDATUM *buf;  char vnumbuf[VL_VNUMBUFSIZ];  VLIDX *idxp;  int i, heir, pid, ksiz, vnumsiz, ln;  assert(villa && node);  buf = cbdatumopen(NULL, 0);  heir = node->heir;  vnumsiz = vlsetvnumbuf(vnumbuf, heir);  cbdatumcat(buf, vnumbuf, vnumsiz);  ln = CB_LISTNUM(node->idxs);  for(i = 0; i < ln; i++){    idxp = (VLIDX *)CB_LISTVAL(node->idxs, i, NULL);    pid = idxp->pid;    vnumsiz = vlsetvnumbuf(vnumbuf, pid);    cbdatumcat(buf, vnumbuf, vnumsiz);    ksiz = CB_DATUMSIZE(idxp->key);    vnumsiz = vlsetvnumbuf(vnumbuf, ksiz);    cbdatumcat(buf, vnumbuf, vnumsiz);    cbdatumcat(buf, CB_DATUMPTR(idxp->key), ksiz);  }  villa->avgnsiz = (villa->avgnsiz * 9 + CB_DATUMSIZE(buf)) / 10;  if(!dpsetalign(villa->depot, villa->avgnsiz * VL_ALIGNRATIO) ||     !dpput(villa->depot, (char *)&(node->id), sizeof(int),            CB_DATUMPTR(buf), CB_DATUMSIZE(buf), DP_DOVER)){    cbdatumclose(buf);    if(dpecode == DP_EMODE) dpecode = DP_EBROKEN;    return FALSE;  }  cbdatumclose(buf);  node->dirty = FALSE;  return TRUE;}/* Load a node from the database.   `villa' specifies a database handle.   `id' specifies the ID number of the node.   If successful, the return value is the pointer to the node, else, it is `NULL'. */static VLNODE *vlnodeload(VILLA *villa, int id){  char *buf, *rp, *kbuf;  int size, step, heir, pid, ksiz;  VLNODE *node, nent;  VLIDX idx;  assert(villa && id >= VL_NODEIDMIN);  if((node = (VLNODE *)cbmapget(villa->nodec, (char *)&id, sizeof(int), NULL)) != NULL){    cbmapmove(villa->nodec, (char *)&id, sizeof(int), FALSE);    return node;  }  heir = -1;  if(!(buf = dpget(villa->depot, (char *)&id, sizeof(int), 0, -1, &size))) return NULL;  rp = buf;  if(size >= 1){    heir = vlreadvnumbuf(rp, size, &step);    rp += step;    size -= step;  }  if(heir < 0){    free(buf);    return NULL;  }  nent.id = id;  nent.dirty = FALSE;  nent.heir = heir;  nent.idxs = cblistopen();  while(size >= 1){    pid = vlreadvnumbuf(rp, size, &step);    rp += step;    size -= step;    if(size < 1) break;    ksiz = vlreadvnumbuf(rp, size, &step);    rp += step;    size -= step;    if(size < ksiz) break;    kbuf = rp;    rp += ksiz;    size -= ksiz;    idx.pid = pid;    idx.key = cbdatumopen(kbuf, ksiz);    cblistpush(nent.idxs, (char *)&idx, sizeof(VLIDX));  }  free(buf);  cbmapput(villa->nodec, (char *)&(nent.id), sizeof(int), (char *)&nent, sizeof(VLNODE), TRUE);  return (VLNODE *)cbmapget(villa->nodec, (char *)&(nent.id), sizeof(int), NULL);}/* Add an index to a node.   `villa' specifies a database handle.   `node' specifies a node handle.   `order' specifies whether the calling sequence is orderd or not.   `pid' specifies the ID number of referred page.   `kbuf' specifies the pointer to the region of a key.   `ksiz' specifies the size of the region of the key. */static void vlnodeaddidx(VILLA *villa, VLNODE *node, int order,                         int pid, const char *kbuf, int ksiz){  VLIDX idx, *idxp;  int i, rv, left, right, ln;  assert(villa && node && pid >= VL_LEAFIDMIN && kbuf && ksiz >= 0);  idx.pid = pid;  idx.key = cbdatumopen(kbuf, ksiz);  if(order){    cblistpush(node->idxs, (char *)&idx, sizeof(VLIDX));  } else {    left = 0;    right = CB_LISTNUM(node->idxs);    i = (left + right) / 2;    ln = CB_LISTNUM(node->idxs);    while(right >= left && i < ln){      idxp = (VLIDX *)CB_LISTVAL(node->idxs, i, NULL);      rv = villa->cmp(kbuf, ksiz, CB_DATUMPTR(idxp->key), CB_DATUMSIZE(idxp->key));      if(rv == 0){        break;      } else if(rv <= 0){        right = i - 1;      } else {        left = i + 1;      }      i = (left + right) / 2;    }    ln = CB_LISTNUM(node->idxs);    while(i < ln){      idxp = (VLIDX *)CB_LISTVAL(node->idxs, i, NULL);      if(villa->cmp(kbuf, ksiz, CB_DATUMPTR(idxp->key), CB_DATUMSIZE(idxp->key)) < 0){        cblistinsert(node->idxs, i, (char *)&idx, sizeof(VLIDX));        break;      }      i++;    }    if(i >= CB_LISTNUM(node->idxs)) cblistpush(node->idxs, (char *)&idx, sizeof(VLIDX));  }  node->dirty = TRUE;}/* Search the leaf corresponding to a key.   `villa' specifies a database handle.   `kbuf' specifies the pointer to the region of a key.   `ksiz' specifies the size of the region of the key.   `hist' specifies an array history of visited nodes.  If `NULL', it is not used.   `hnp' specifies the pointer to a variable to which the number of elements of the history   assigned.  If `NULL', it is not used.   The return value is the ID number of the leaf, or -1 on failure. */static int vlsearchleaf(VILLA *villa, const char *kbuf, int ksiz, int *hist, int *hnp){  VLNODE *node;  VLIDX *idxp;  int i, pid, level, rv, left, right, ln;  assert(villa && kbuf && ksiz >= 0);  pid = villa->root;  idxp = NULL;  level = 0;  while(pid >= VL_NODEIDMIN){    if(!(node = vlnodeload(villa, pid)) || (ln = CB_LISTNUM(node->idxs)) < 1){      dpecode = DP_EBROKEN;      if(hnp) *hnp = level;      return -1;    }    if(hist) hist[level++] = node->id;    left = 1;    right = ln;    i = (left + right) / 2;    while(right >= left && i < ln){      idxp = (VLIDX *)CB_LISTVAL(node->idxs, i, NULL);      rv = villa->cmp(kbuf, ksiz, CB_DATUMPTR(idxp->key), CB_DATUMSIZE(idxp->key));      if(rv == 0){        break;      } else if(rv <= 0){        right = i - 1;      } else {        left = i + 1;      }      i = (left + right) / 2;    }    if(i > 0) i--;    while(i < ln){      idxp = (VLIDX *)CB_LISTVAL(node->idxs, i, NULL);      if(villa->cmp(kbuf, ksiz, CB_DATUMPTR(idxp->key), CB_DATUMSIZE(idxp->key)) < 0){        if(i == 0){          pid = node->heir;          break;        }        idxp = (VLIDX *)CB_LISTVAL(node->idxs, i - 1, NULL);        pid = idxp->pid;        break;      }      i++;    }    if(i >= ln) pid = idxp->pid;  }  if(hnp) *hnp = level;  return pid;}/* Adjust the caches for leaves and nodes.   `villa' specifies a database handle.   The return value is true if successful, else, it is false. */static int vlcacheadjust(VILLA *villa){  const char *tmp;  int i, pid, err;  err = FALSE;  if(cbmaprnum(villa->leafc) > villa->leafcnum){    cbmapiterinit(villa->leafc);    for(i = 0; i < VL_CACHEOUT; i++){      tmp = cbmapiternext(villa->leafc, NULL);      pid = *(int *)tmp;      if(!vlleafcacheout(villa, pid)) err = TRUE;    }  }  if(cbmaprnum(villa->nodec) > villa->nodecnum){    cbmapiterinit(villa->nodec);    for(i = 0; i < VL_CACHEOUT; i++){      tmp = cbmapiternext(villa->nodec, NULL);      pid = *(int *)tmp;      if(!vlnodecacheout(villa, pid)) err = TRUE;    }  }  return err ? FALSE : TRUE;}/* Search a record of a leaf.   `villa' specifies a database handle.   `leaf' specifies a leaf handle.   `kbuf' specifies the pointer to the region of a key.   `ksiz' specifies the size of the region of the key.   `ip' specifies the pointer to a variable to fetch the index of the correspnding record.   The return value is the pointer to a corresponding record, or `NULL' on failure. */static VLREC *vlrecsearch(VILLA *villa, VLLEAF *leaf, const char *kbuf, int ksiz, int *ip){  int i, rv, left, right, ln;  VLREC *recp;  assert(villa && leaf && kbuf && ksiz >= 0);  ln = CB_LISTNUM(leaf->recs);  left = 0;  right = ln;  i = (left + right) / 2;  while(right >= left && i < ln){    recp = (VLREC *)CB_LISTVAL(leaf->recs, i, NULL);    rv = villa->cmp(kbuf, ksiz, CB_DATUMPTR(recp->key), CB_DATUMSIZE(recp->key));    if(rv == 0){      if(ip) *ip = i;      return recp;    } else if(rv <= 0){      right = i - 1;    } else {      left = i + 1;    }    i = (left + right) / 2;  }  if(ip) *ip = i;  return NULL;}/* END OF FILE */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -