⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 odeum.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
  if(odeum->fatal){    dpecode = DP_EFATAL;    return NULL;  }  if(!(tmp = vlget(odeum->rdocsdb, uri, -1, &tsiz))){    if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;    return NULL;  }  if(tsiz != sizeof(int)){    free(tmp);    dpecode = DP_EBROKEN;    odeum->fatal = TRUE;    return NULL;  }  docid = *(int *)tmp;  free(tmp);  return odgetbyid(odeum, docid);}/* Retrieve a document by an ID number. */ODDOC *odgetbyid(ODEUM *odeum, int id){  char *tmp, *zbuf;  const char *uritmp, *attrstmp, *nwordstmp, *awordstmp;  int tsiz, uritsiz, attrstsiz, nwordstsiz, awordstsiz, zsiz;  ODDOC *doc;  CBMAP *map;  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return NULL;  }  if(id < 1){    dpecode = DP_ENOITEM;    return NULL;  }  if(!(tmp = crget(odeum->docsdb, (char *)&id, sizeof(int), 0, -1, &tsiz))){    if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;    return NULL;  }  if(_qdbm_inflate){    if(!(zbuf = _qdbm_inflate(tmp, tsiz, &zsiz))){      free(tmp);      dpecode = DP_EBROKEN;      odeum->fatal = TRUE;      return NULL;    }    free(tmp);    tmp = zbuf;    tsiz = zsiz;  }  map = cbmapload(tmp, tsiz);  free(tmp);  uritmp = cbmapget(map, OD_URIEXPR, sizeof(OD_URIEXPR), &uritsiz);  attrstmp = cbmapget(map, OD_ATTRSEXPR, sizeof(OD_ATTRSEXPR), &attrstsiz);  nwordstmp = cbmapget(map, OD_NWORDSEXPR, sizeof(OD_NWORDSEXPR), &nwordstsiz);  awordstmp = cbmapget(map, OD_AWORDSEXPR, sizeof(OD_AWORDSEXPR), &awordstsiz);  if(!uritmp || !attrstmp || !nwordstmp || !awordstmp){    cbmapclose(map);    dpecode = DP_EBROKEN;    odeum->fatal = TRUE;    return NULL;  }  doc = cbmalloc(sizeof(ODDOC));  doc->id = id;  doc->uri = cbmemdup(uritmp, uritsiz);  doc->attrs = cbmapload(attrstmp, attrstsiz);  doc->nwords = cblistload(nwordstmp, nwordstsiz);  doc->awords = cblistload(awordstmp, awordstsiz);  cbmapclose(map);  return doc;}/* Search the inverted index for documents including a word. */ODPAIR *odsearch(ODEUM *odeum, const char *word, int max, int *np){  char *tmp;  int tsiz;  assert(odeum && word && np);  if(odeum->fatal){    dpecode = DP_EFATAL;    return NULL;  }  if(odeum->wmode && cbmaprnum(odeum->sortmap) > 0 && !odsortindex(odeum)){    odeum->fatal = TRUE;    return NULL;  }  max = max < 0 ? -1 : max * sizeof(ODPAIR);  if(!(tmp = crget(odeum->indexdb, word, -1, 0, max, &tsiz))){    if(dpecode != DP_ENOITEM){      odeum->fatal = TRUE;      return NULL;    }    *np = 0;    return cbmalloc(1);  }  *np = tsiz / sizeof(ODPAIR);  return (ODPAIR *)tmp;}/* Get the number of documents including a word. */int odsearchdnum(ODEUM *odeum, const char *word){  int rv;  assert(odeum && word);  if(odeum->fatal){    dpecode = DP_EFATAL;    return -1;  }  rv = crvsiz(odeum->indexdb, word, -1);  return rv < 0 ? -1 : rv / sizeof(ODPAIR);}/* Initialize the iterator of a database handle. */int oditerinit(ODEUM *odeum){  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return FALSE;  }  return criterinit(odeum->docsdb);}/* Get the next key of the iterator. */ODDOC *oditernext(ODEUM *odeum){  char *tmp;  int tsiz, docsid;  ODDOC *doc;  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return NULL;  }  doc = NULL;  while(TRUE){    if(!(tmp = criternext(odeum->docsdb, &tsiz))){      if(dpecode != DP_ENOITEM) odeum->fatal = TRUE;      return NULL;    }    if(tsiz != sizeof(int)){      free(tmp);      dpecode = DP_EBROKEN;      odeum->fatal = TRUE;      return NULL;    }    docsid = *(int *)tmp;    free(tmp);    if((doc = odgetbyid(odeum, docsid)) != NULL) break;    if(dpecode != DP_ENOITEM){      odeum->fatal = TRUE;      return NULL;    }  }  return doc;}/* Synchronize updating contents with the files and the devices. */int odsync(ODEUM *odeum){  char numbuf[OD_NUMBUFSIZ];  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return FALSE;  }  if(!odeum->wmode){    dpecode = DP_EMODE;    return FALSE;  }  sprintf(numbuf, "%d", odeum->dmax);  if(!vlput(odeum->rdocsdb, OD_DMAXEXPR, sizeof(OD_DMAXEXPR), numbuf, -1, VL_DOVER)){    odeum->fatal = TRUE;    return FALSE;  }  sprintf(numbuf, "%d", odeum->dnum);  if(!vlput(odeum->rdocsdb, OD_DNUMEXPR, sizeof(OD_DNUMEXPR), numbuf, -1, VL_DOVER)){    odeum->fatal = TRUE;    return FALSE;  }  if(!odsortindex(odeum)){    odeum->fatal = TRUE;    return FALSE;  }  if(!crsync(odeum->docsdb)){    odeum->fatal = TRUE;    return FALSE;  }  if(!crsync(odeum->indexdb)){    odeum->fatal = TRUE;    return FALSE;  }  if(!vlsync(odeum->rdocsdb)){    odeum->fatal = TRUE;    return FALSE;  }  return TRUE;}/* Optimize a database. */int odoptimize(ODEUM *odeum){  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return FALSE;  }  if(!odeum->wmode){    dpecode = DP_EMODE;    return FALSE;  }  if(!odpurgeindex(odeum)){    odeum->fatal = TRUE;    return FALSE;  }  if(!odsortindex(odeum)){    odeum->fatal = TRUE;    return FALSE;  }  if(!croptimize(odeum->docsdb, -1)){    odeum->fatal = TRUE;    return FALSE;  }  if(!croptimize(odeum->indexdb, -1)){    odeum->fatal = TRUE;    return FALSE;  }  if(!vloptimize(odeum->rdocsdb)){    odeum->fatal = TRUE;    return FALSE;  }  return TRUE;}/* Get the name of a database. */char *odname(ODEUM *odeum){  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return NULL;  }  return cbmemdup(odeum->name, -1);}/* Get the total size of database files. */int odfsiz(ODEUM *odeum){  int fsiz, rv;  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return -1;  }  fsiz = 0;  if((rv = crfsiz(odeum->docsdb)) == -1) return -1;  fsiz += rv;  if((rv = crfsiz(odeum->indexdb)) == -1) return -1;  fsiz += rv;  if((rv = vlfsiz(odeum->rdocsdb)) == -1) return -1;  fsiz += rv;  return fsiz;}/* Get the number of the elements of the bucket array used in the inverted index. */int odbnum(ODEUM *odeum){  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return -1;  }  return crbnum(odeum->indexdb);}/* Get the number of the documents stored in a database. */int oddnum(ODEUM *odeum){  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return -1;  }  return odeum->dnum;}/* Get the number of the words stored in a database. */int odwnum(ODEUM *odeum){  assert(odeum);  if(odeum->fatal){    dpecode = DP_EFATAL;    return -1;  }  return crrnum(odeum->indexdb);}/* Check whether a database handle is a writer or not. */int odwritable(ODEUM *odeum){  assert(odeum);  return odeum->wmode;}/* Check whether a database has a fatal error or not. */int odfatalerror(ODEUM *odeum){  assert(odeum);  return odeum->fatal;}/* Get the inode number of a database directory. */int odinode(ODEUM *odeum){  assert(odeum);  return odeum->inode;}/* Remove a database directory. */int odremove(const char *name){  char docsname[OD_PATHBUFSIZ], indexname[OD_PATHBUFSIZ], rdocsname[OD_PATHBUFSIZ];  char path[OD_PATHBUFSIZ];  const char *file;  struct stat sbuf;  CBLIST *list;  int i;  assert(name);  sprintf(docsname, "%s%c%s", name, MYPATHCHR, OD_DOCSNAME);  sprintf(indexname, "%s%c%s", name, MYPATHCHR, OD_INDEXNAME);  sprintf(rdocsname, "%s%c%s", name, MYPATHCHR, OD_RDOCSNAME);  if(stat(name, &sbuf) == -1){    dpecode = DP_ESTAT;    return FALSE;  }  if(stat(docsname, &sbuf) != -1 && !crremove(docsname)) return FALSE;  if(stat(indexname, &sbuf) != -1 && !crremove(indexname)) return FALSE;  if(stat(rdocsname, &sbuf) != -1 && !vlremove(rdocsname)) return FALSE;  if((list = cbdirlist(name)) != NULL){    for(i = 0; i < cblistnum(list); i++){      file = cblistval(list, i, NULL);      if(!strcmp(file, MYCDIRSTR) || !strcmp(file, MYPDIRSTR)) continue;      sprintf(path, "%s%c%s", name, MYPATHCHR, file);      if(stat(path, &sbuf) == -1) continue;      if(S_ISDIR(sbuf.st_mode)){        if(!crremove(path)) return FALSE;      } else {        if(!dpremove(path)) return FALSE;      }    }    cblistclose(list);  }  if(rmdir(name) == -1){    dpecode = DP_ERMDIR;    return FALSE;  }  return TRUE;}/* Get a document handle. */ODDOC *oddocopen(const char *uri){  ODDOC *doc;  assert(uri);  doc = cbmalloc(sizeof(ODDOC));  doc->id = -1;  doc->uri = cbmemdup(uri, -1);  doc->attrs = cbmapopen();  doc->nwords = cblistopen();  doc->awords = cblistopen();  return doc;}/* Close a document handle. */void oddocclose(ODDOC *doc){  assert(doc);  cblistclose(doc->awords);  cblistclose(doc->nwords);  cbmapclose(doc->attrs);  free(doc->uri);  free(doc);}/* Add an attribute to a document. */void oddocaddattr(ODDOC *doc, const char *name, const char *value){  assert(doc && name && value);  cbmapput(doc->attrs, name, -1, value, -1, TRUE);}/* Add a word to a document. */void oddocaddword(ODDOC *doc, const char *normal, const char *asis){  assert(doc && normal && asis);  cblistpush(doc->nwords, normal, -1);  cblistpush(doc->awords, asis, -1);}/* Get the ID number of a document. */int oddocid(const ODDOC *doc){  assert(doc);  return doc->id;}/* Get the URI of a document. */const char *oddocuri(const ODDOC *doc){  assert(doc);  return doc->uri;}/* Get the value of an attribute of a document. */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -