⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 odmgr.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
    if(score >= 0) printf("SCORE: %d\n", score);    scores = oddocscores(doc, MAXKEYWORDS, odeum);    kwords = cblistopen();    printf("KEYWORDS: ");    cbmapiterinit(scores);    while((word = cbmapiternext(scores, &wsiz)) != NULL){      if(cblistnum(kwords) > 0) printf(", ");      printf("%s (%s)", word, cbmapget(scores, word, wsiz, NULL));      cblistpush(kwords, word, wsiz);    }    putchar('\n');    summary = docsummary(doc, skeys ? skeys : kwords, MAXSUMMARY, skeys != NULL);    printf("SUMMARY: %s\n", summary);    free(summary);    cblistclose(kwords);    cbmapclose(scores);    printf("\n\n");  } else if(tb){    printf("%d\t%s\t%s\t%s\t%s\t%d\n", oddocid(doc), oddocuri(doc),           title ? title : "", author ? author : "", date ? date : "", score);    words = oddocnwords(doc);    for(i = 0; i < cblistnum(words); i++){      word = cblistval(words, i, &wsiz);      if(i > 0) putchar('\t');      printf("%s", word);    }    putchar('\n');    words = oddocawords(doc);    for(i = 0; i < cblistnum(words); i++){      word = cblistval(words, i, &wsiz);      if(i > 0) putchar('\t');      printf("%s", word);    }    putchar('\n');  } else {    printf("%d\t%s\t%d\n", oddocid(doc), oddocuri(doc), score);  }}/* get a list handle contains summary of a document */char *docsummary(const ODDOC *doc, const CBLIST *kwords, int num, int hilight){  const CBLIST *nwords, *awords;  CBMAP *kmap, *map;  const char *normal, *asis;  char *sbuf;  int i, j, bsiz, ssiz, lnum, nwsiz, awsiz, pv, bi, first;  bsiz = 256;  sbuf = cbmalloc(bsiz);  ssiz = 0;  nwords = oddocnwords(doc);  awords = oddocawords(doc);  kmap = listtomap(kwords);  map = listtomap(kwords);  lnum = cblistnum(nwords);  first = TRUE;  for(i = 0; i < lnum && i < SUMMARYWIDTH; i++){    normal = cblistval(nwords, i, &nwsiz);    asis = cblistval(awords, i, &awsiz);    if(awsiz < 1) continue;    cbmapout(map, normal, nwsiz);    if(ssiz + awsiz + 16 >= bsiz){      bsiz = bsiz * 2 + awsiz;      sbuf = cbrealloc(sbuf, bsiz);    }    if(!first) ssiz += sprintf(sbuf + ssiz, " ");    if(hilight && normal[0] != '\0' && cbmapget(kmap, normal, nwsiz, NULL)){      ssiz += sprintf(sbuf + ssiz, "<<%s>>", asis);    } else {      ssiz += sprintf(sbuf + ssiz, "%s", asis);    }    first = FALSE;    num--;  }  ssiz += sprintf(sbuf + ssiz, " ...");  pv = i;  while(i < lnum){    if(cbmaprnum(map) < 1){      cbmapclose(map);      map = listtomap(kwords);    }    normal = cblistval(nwords, i, &nwsiz);    if(cbmapget(map, normal, nwsiz, NULL)){      bi = i - SUMMARYWIDTH / 2;      bi = bi > pv ? bi : pv;      for(j = bi; j < lnum && j <= bi + SUMMARYWIDTH; j++){        normal = cblistval(nwords, j, &nwsiz);        asis = cblistval(awords, j, &awsiz);        if(awsiz < 1) continue;        cbmapout(map, normal, nwsiz);        if(ssiz + awsiz + 16 >= bsiz){          bsiz = bsiz * 2 + awsiz;          sbuf = cbrealloc(sbuf, bsiz);        }        ssiz += sprintf(sbuf + ssiz, " ");        if(hilight && normal[0] != '\0' && cbmapget(kmap, normal, nwsiz, NULL)){          ssiz += sprintf(sbuf + ssiz, "<<%s>>", asis);        } else {          ssiz += sprintf(sbuf + ssiz, "%s", asis);        }        num--;      }      ssiz += sprintf(sbuf + ssiz, " ...");      i = j;      pv = i;    } else {      i++;    }    if(num <= 0) break;  }  cbmapclose(map);  cbmapclose(kmap);  return sbuf;}/* get a map made from a list */CBMAP *listtomap(const CBLIST *list){  CBMAP *map;  const char *tmp;  int i, tsiz;  map = cbmapopen();  for(i = 0; i < cblistnum(list); i++){    tmp = cblistval(list, i, &tsiz);    cbmapput(map, tmp, tsiz, "", 0, FALSE);  }  return map;}/* perform create command */int docreate(const char *name){  ODEUM *odeum;  if(!(odeum = odopen(name, OD_OWRITER | OD_OCREAT | OD_OTRUNC))){    pdperror(name);    return 1;  }  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform put command */int doput(const char *name, const char *text, const char *uri, const char *title,          const char *author, const char *date, int wmax, int keep){  ODEUM *odeum;  ODDOC *doc;  CBLIST *awords;  const char *asis;  char *normal;  int i;  if(!(odeum = odopen(name, OD_OWRITER))){    pdperror(name);    return 1;  }  doc = oddocopen(uri);  if(title) oddocaddattr(doc, "title", title);  if(author) oddocaddattr(doc, "author", author);  if(date) oddocaddattr(doc, "date", date);  awords = odbreaktext(text);  for(i = 0; i < cblistnum(awords); i++){    asis = cblistval(awords, i, NULL);    normal = odnormalizeword(asis);    oddocaddword(doc, normal, asis);    free(normal);  }  cblistclose(awords);  if(!odput(odeum, doc, wmax, keep ? FALSE : TRUE)){    pdperror(name);    oddocclose(doc);    odclose(odeum);    return 1;  }  oddocclose(doc);  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform out command */int doout(const char *name, const char *uri, int id){  ODEUM *odeum;  if(!(odeum = odopen(name, OD_OWRITER))){    pdperror(name);    return 1;  }  if(id > 0){    if(!odoutbyid(odeum, id)){      pdperror(name);      odclose(odeum);      return 1;    }  } else {    if(!odout(odeum, uri)){      pdperror(name);      odclose(odeum);      return 1;    }  }  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform get command */int doget(const char *name, const char *uri, int id, int tb, int hb){  ODEUM *odeum;  ODDOC *doc;  if(!(odeum = odopen(name, OD_OREADER))){    pdperror(name);    return 1;  }  if(id > 0){    if(!(doc = odgetbyid(odeum, id))){      pdperror(name);      odclose(odeum);      return 1;    }  } else {    if(!(doc = odget(odeum, uri))){      pdperror(name);      odclose(odeum);      return 1;    }  }  printdoc(doc, tb, hb, -1, odeum, NULL);  oddocclose(doc);  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform search command */int dosearch(const char *name, const char *text, int max, int or, int idf,             int tb, int hb, int nb){  ODEUM *odeum;  CBLIST *awords, *nwords, *uris, *hits;  ODPAIR *pairs, *last, *tmp;  ODDOC *doc;  const char *asis;  char *normal, numbuf[32];  int i, j, pnum, lnum, hnum, tnum, shows;  double ival;  if(!(odeum = odopen(name, OD_OREADER))){    pdperror(name);    return 1;  }  awords = odbreaktext(text);  nwords = cblistopen();  uris = cblistopen();  hits = cblistopen();  last = NULL;  lnum = 0;  for(i = 0; i < cblistnum(awords); i++){    asis = cblistval(awords, i, NULL);    normal = odnormalizeword(asis);    cblistpush(nwords, normal, -1);    if(strlen(normal) < 1){      free(normal);      continue;    }    if(!(pairs = odsearch(odeum, normal, or ? max : -1, &pnum))){      pdperror(name);      free(normal);      continue;    }    if((hnum = odsearchdnum(odeum, normal)) < 0) hnum = 0;    if(idf){      ival = odlogarithm(hnum);      ival = (ival * ival) / 4.0;      if(ival < 4.0) ival = 4.0;      for(j = 0; j < pnum; j++){        pairs[j].score /= ival;      }    }    cblistpush(uris, normal, -1);    sprintf(numbuf, "%d", hnum);    cblistpush(hits, numbuf, -1);    if(last){      if(or){        tmp = odpairsor(last, lnum, pairs, pnum, &tnum);      } else {        tmp = odpairsand(last, lnum, pairs, pnum, &tnum);      }      free(last);      free(pairs);      last = tmp;      lnum = tnum;    } else {      last = pairs;      lnum = pnum;    }    free(normal);  }  if(hb){    printf("TOTAL: %d\n", lnum);    printf("EACHWORD: ");  } else {    printf("%d", lnum);  }  for(i = 0; i < cblistnum(uris); i++){    if(hb){      if(i > 0) printf(", ");      printf("%s(%s)", cblistval(uris, i, NULL), cblistval(hits, i, NULL));    } else {      printf("\t%s\t%s", cblistval(uris, i, NULL), cblistval(hits, i, NULL));    }  }  putchar('\n');  if(hb) putchar('\n');  if(last){    if(max < 0) max = lnum;    shows = 0;    for(i = 0; i < lnum && shows < max; i++){      if(nb){        printf("%d\t%d\n", last[i].id, last[i].score);        shows++;      } else {        if(!(doc = odgetbyid(odeum, last[i].id))) continue;        printdoc(doc, tb, hb, last[i].score, odeum, nwords);        oddocclose(doc);        shows++;      }    }    free(last);  }  cblistclose(uris);  cblistclose(hits);  cblistclose(nwords);  cblistclose(awords);  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform list command */int dolist(const char *name, int tb, int hb){  ODEUM *odeum;  ODDOC *doc;  if(!(odeum = odopen(name, OD_OREADER))){    pdperror(name);    return 1;  }  if(!oditerinit(odeum)){    odclose(odeum);    pdperror(name);    return 1;  }  while(TRUE){    if(!(doc = oditernext(odeum))){      if(dpecode == DP_ENOITEM) break;      odclose(odeum);      pdperror(name);      return 1;    }    printdoc(doc, tb, hb, -1, odeum, NULL);    oddocclose(doc);  }  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform optimize command */int dooptimize(const char *name){  ODEUM *odeum;  if(!(odeum = odopen(name, OD_OWRITER))){    pdperror(name);    return 1;  }  if(!odoptimize(odeum)){    pdperror(name);    odclose(odeum);    return 1;  }  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform inform command */int doinform(const char *name){  ODEUM *odeum;  char *tmp;  if(!(odeum = odopen(name, OD_OREADER))){    pdperror(name);    return 1;  }  tmp = odname(odeum);  printf("name: %s\n", tmp ? tmp : "(null)");  free(tmp);  printf("file size: %d\n", odfsiz(odeum));  printf("index buckets: %d\n", odbnum(odeum));  printf("all documents: %d\n", oddnum(odeum));  printf("all words: %d\n", odwnum(odeum));  printf("inode number: %d\n", odinode(odeum));  if(!odclose(odeum)){    pdperror(name);    return 1;  }  return 0;}/* perform remove command */int doremove(const char *name){  if(!odremove(name)){    pdperror(name);    return 1;  }  return 0;}/* perform break command */int dobreak(const char *text, int hb, int kb, int sb){  CBLIST *awords, *kwords;  CBMAP *scores;  ODDOC *doc;  const char *asis;  char *normal, *summary;  int i, first;  awords = odbreaktext(text);  if(kb || sb){    doc = oddocopen("");    for(i = 0; i < cblistnum(awords); i++){      asis = cblistval(awords, i, NULL);      normal = odnormalizeword(asis);      oddocaddword(doc, normal, asis);      free(normal);    }    scores = oddocscores(doc, MAXKEYWORDS, NULL);    cbmapiterinit(scores);    kwords = cbmapkeys(scores);    if(kb){      for(i = 0; i < cblistnum(kwords); i++){        if(i > 0) putchar('\t');        printf("%s", cblistval(kwords, i, NULL));      }      putchar('\n');    } else {      summary = docsummary(doc, kwords, MAXSUMMARY, FALSE);      printf("%s\n", summary);      free(summary);    }    cblistclose(kwords);    cbmapclose(scores);    oddocclose(doc);  } else if(hb){    printf("NWORDS: ");    first = TRUE;    for(i = 0; i < cblistnum(awords); i++){      asis = cblistval(awords, i, NULL);      normal = odnormalizeword(asis);      if(normal[0] == '\0'){        free(normal);        continue;      }      if(!first) putchar(' ');      first = FALSE;      printf("%s", normal);      free(normal);    }    putchar('\n');    printf("AWORDS: ");    first = TRUE;    for(i = 0; i < cblistnum(awords); i++){      asis = cblistval(awords, i, NULL);      if(asis[0] == '\0') continue;      if(!first) putchar(' ');      first = FALSE;      printf("%s", asis);    }    putchar('\n');  } else {    for(i = 0; i < cblistnum(awords); i++){      asis = cblistval(awords, i, NULL);      normal = odnormalizeword(asis);      printf("%s\t%s\n", normal, asis);      free(normal);    }  }  cblistclose(awords);  return 0;}/* END OF FILE */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -