⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qfts.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
  scdb = dpopen(path, DP_OREADER, -1);  htmlprintf("<p>");  last = NULL;  lnum = 0;  for(i = 0; i < cblistnum(words); i++){    word = cblistval(words, i, NULL);    if(!(pairs = odsearch(odeum, word, -1, &pnum))) continue;    if((hnum = odsearchdnum(odeum, word)) < 0) hnum = 0;    ival = odlogarithm(hnum);    ival = (ival * ival) / 2.0;    if(ival < 2.0) ival = 2.0;    for(j = 0; j < pnum; j++){      pairs[j].score /= ival;    }    if(last) htmlprintf(" %@ ", unit == UNITOR ? "+" : "*");    htmlprintf("<em>%@</em> (%d)", word, hnum);    if(last){      if(unit == UNITOR){        tmp = odpairsor(last, lnum, pairs, pnum, &tnum);      } else {        tmp = odpairsand(last, lnum, pairs, pnum, &tnum);      }      free(last);      free(pairs);      last = tmp;      lnum = tnum;    } else {      last = pairs;      lnum = pnum;    }  }  for(i = 0; i < cblistnum(ewords); i++){    word = cblistval(ewords, i, NULL);    if(!(pairs = odsearch(odeum, word, -1, &pnum))) continue;    if((hnum = odsearchdnum(odeum, word)) < 0) hnum = 0;    htmlprintf(" - <em>%@</em> (%d)", word, hnum);    if(last){      tmp = odpairsnotand(last, lnum, pairs, pnum, &tnum);      free(last);      free(pairs);      last = tmp;      lnum = tnum;    } else {      free(pairs);    }  }  htmlprintf(" = <em>%d</em> hits</p>\n", lnum);  if(last && lnum > 0){    for(i = skip; i < lnum && i < max + skip; i++){      if(!(doc = odgetbyid(odeum, last[i].id))){        htmlprintf("<dl>\n");        htmlprintf("<dt class=\"missing\">%d: (replaced or purged)</dt>\n", i + 1);        htmlprintf("</dl>\n");        continue;      }      ubuf = cbmemdup(oddocuri(doc), -1);      if(diridx && bwmatch(ubuf, diridx)){        ulen = strlen(ubuf) - strlen(diridx);        if(ulen > 1 && ubuf[ulen-1] == '/') ubuf[ulen] = '\0';      }      uri = ubuf;      if(fwmatch(uri, DEFPREFIX)) uri += strlen(DEFPREFIX);      title = oddocgetattr(doc, "title");      date = oddocgetattr(doc, "date");      author = oddocgetattr(doc, "author");      htmlprintf("<dl>\n");      htmlprintf("<dt class=\"blur\">%d: <a href=\"%@%@\" class=\"title\">%@</a> (%d pt.)</dt>\n",                 i + 1, prefix, uri,                 title && strlen(title) > 0 ? title : "(untitled)", last[i].score);      scores = NULL;      if(scdb){        id = oddocid(doc);        if((mbuf = dpget(scdb, (char *)&id, sizeof(int), 0, -1, &msiz)) != NULL){          scores = cbmapload(mbuf, msiz);          free(mbuf);        }      }      showsummary(doc, words, phrase, unit, except, max, scores);      if(scores) cbmapclose(scores);      htmlprintf("<dd class=\"blur\">");      htmlprintf("%@%@", prefix, uri);      if(date) htmlprintf(" (%@)", date);      if(author) htmlprintf(" (%@)", author);      htmlprintf("</dd>\n");      htmlprintf("</dl>\n");      free(ubuf);      oddocclose(doc);    }  } else {    htmlprintf("<p>No document hits.</p>\n");  }  if(last) free(last);  htmlprintf("<div class=\"note\">");  if(skip > 0){    htmlprintf("<a href=\"%@?phrase=%?&amp;unit=%d&amp;except=%?&amp;max=%d&amp;skip=%d\">"               "[PREV]</a>", scriptname, phrase, unit, except, max, skip - max);  } else {    htmlprintf("<span class=\"blur\">[PREV]</span>");  }  htmlprintf(" ");  if(i < lnum){    htmlprintf("<a href=\"%@?phrase=%?&amp;unit=%d&amp;except=%?&amp;max=%d&amp;skip=%d\">"               "[NEXT]</a>", scriptname, phrase, unit, except, max, skip + max);  } else {    htmlprintf("<span class=\"blur\">[NEXT]</span>");  }  htmlprintf("</div>\n");  htmlprintf("<div class=\"note\">The index contains %d documents and %d words.</div>\n",             oddnum(odeum), odwnum(odeum));  if(scdb) dpclose(scdb);  odclose(odeum);}/* show summary of a document */void showsummary(const ODDOC *doc, const CBLIST *kwords, const char *phrase, int unit,                 const char *except, int max, CBMAP *scores){  const CBLIST *nwords, *awords;  CBMAP *kmap, *map;  const char *normal, *asis, *kbuf;  int i, j, num, lnum, nwsiz, awsiz, pv, bi, first, em;  htmlprintf("<dd class=\"summary\">");  num = SUMWORDMAX;  nwords = oddocnwords(doc);  awords = oddocawords(doc);  kmap = kwords ? listtomap(kwords) : cbmapopen();  map = kwords ? listtomap(kwords) : cbmapopen();  lnum = cblistnum(nwords);  first = TRUE;  em = FALSE;  for(i = 0; i < lnum && i < (kwords ? SUMTOP : SUMWORDMAX); i++){    normal = cblistval(nwords, i, &nwsiz);    asis = cblistval(awords, i, &awsiz);    if(awsiz < 1) continue;    cbmapout(map, normal, nwsiz);    if(normal[0] != '\0' && cbmapget(kmap, normal, nwsiz, NULL)){      if(!first) htmlprintf(" ");      if(!em) htmlprintf("<em class=\"key%d\">",                         kwords ? cblistlsearch(kwords, normal, nwsiz) % EMCLASSNUM : 0);      htmlprintf("%@", asis);      em = TRUE;    } else {      if(em) htmlprintf("</em>");      if(!first) htmlprintf(" ");      htmlprintf("%@", asis);      em = FALSE;    }    first = FALSE;    num--;  }  if(em) htmlprintf("</em>");  htmlprintf(" ...");  em = FALSE;  pv = i;  while(i < lnum){    if(cbmaprnum(map) < 1){      cbmapclose(map);      map = kwords ? listtomap(kwords) : cbmapopen();    }    normal = cblistval(nwords, i, &nwsiz);    if(cbmapget(map, normal, nwsiz, NULL)){      bi = i - SUMWIDTH / 2;      bi = bi > pv ? bi : pv;      for(j = bi; j < lnum && j <= bi + SUMWIDTH; j++){        normal = cblistval(nwords, j, &nwsiz);        asis = cblistval(awords, j, &awsiz);        if(awsiz < 1) continue;        cbmapout(map, normal, nwsiz);        if(normal[0] != '\0' && cbmapget(kmap, normal, nwsiz, NULL)){          htmlprintf(" ");          if(!em) htmlprintf("<em class=\"key%d\">",                             kwords ? cblistlsearch(kwords, normal, nwsiz) % EMCLASSNUM : 0);          htmlprintf("%@", asis);          em = TRUE;        } else {          if(em) htmlprintf("</em>");          htmlprintf(" ");          htmlprintf("%@", asis);          em = FALSE;        }        num--;      }      if(em) htmlprintf("</em>");      htmlprintf(" ...");      em = FALSE;      i = j;      pv = i;    } else {      i++;    }    if(num <= 0) break;  }  htmlprintf(" <a href=\"%@?id=%d&amp;phrase=%?&amp;unit=%d&amp;except=%?&amp;max=%d\">"             "[more]</a>", scriptname, oddocid(doc), phrase, unit, except, max);  if(scores){    htmlprintf("<div class=\"blur\">(");    cbmapiterinit(scores);    for(i = 0; i < SCSHOWNUM && (kbuf = cbmapiternext(scores, NULL)) != NULL; i++){      if(i > 0) htmlprintf(" / ");      if(strlen(phrase) > 0){        htmlprintf("<a href=\"%@?phrase=%?+%?&amp;&amp;unit=%d&amp;except=%?&amp;max=%d\">%@</a>",                   scriptname, phrase, kbuf, unit, except, max, kbuf);      } else {        htmlprintf("<a href=\"%@?phrase=%?&amp;&amp;unit=%d&amp;except=%?&amp;max=%d\">%@</a>",                   scriptname, kbuf, unit, except, max, kbuf);      }    }    htmlprintf(") : <a href=\"%@?max=%d&amp;rel=%d\">[related]</a></div>",               scriptname, max, oddocid(doc));  }  cbmapclose(map);  cbmapclose(kmap);  htmlprintf("</dd>\n");}/* get a map made from a list */CBMAP *listtomap(const CBLIST *list){  CBMAP *map;  const char *tmp;  int i, tsiz;  map = cbmapopen();  for(i = 0; i < cblistnum(list); i++){    tmp = cblistval(list, i, &tsiz);    cbmapput(map, tmp, tsiz, "", 0, FALSE);  }  return map;}/* show words in a document */void showwords(int id, const CBLIST *words, const char *phrase, int unit, const char *except,               int max, const char *index, const char *prefix, const char *diridx){  ODEUM *odeum;  ODDOC *doc;  const CBLIST *awords, *nwords;  CBLIST *kwords;  CBMAP *kmap;  const char *uri, *title, *date, *author, *kword, *asis, *normal;  char *ubuf;  int i, wnum, nwsiz, awsiz, first, em, ulen;  if(!(odeum = odopen(index, OD_OREADER))){    htmlprintf("<p>The index cannot be open because of `%@'.</p>\n", dperrmsg(dpecode));    return;  }  if((doc = odgetbyid(odeum, id)) != NULL){    ubuf = cbmemdup(oddocuri(doc), -1);    if(diridx && bwmatch(ubuf, diridx)){      ulen = strlen(ubuf) - strlen(diridx);      if(ulen > 1 && ubuf[ulen-1] == '/') ubuf[ulen] = '\0';    }    uri = ubuf;    if(fwmatch(uri, DEFPREFIX)) uri += strlen(DEFPREFIX);    title = oddocgetattr(doc, "title");    date = oddocgetattr(doc, "date");    author = oddocgetattr(doc, "author");    awords = oddocawords(doc);    nwords = oddocnwords(doc);    htmlprintf("<div>ID: %d</div>\n", oddocid(doc));    htmlprintf("<div>URI: <a href=\"%@%@\">%@%@</a></div>\n", prefix, uri, prefix, uri);    if(title) htmlprintf("<div>Title: <span class=\"title\">%@</span></div>\n", title);    if(date) htmlprintf("<div>Date: %@</div>\n", date);    if(author) htmlprintf("<div>Author: %@</div>\n", author);    kmap = oddocscores(doc, KEYWORDS, odeum);    kwords = cbmapkeys(kmap);    htmlprintf("<div>Keywords: ");    for(i = 0; i < cblistnum(kwords); i++){      kword = cblistval(kwords, i, NULL);      if(i > 0) htmlprintf(", ");      if(strlen(phrase) > 0){        htmlprintf("<a href=\"%@?phrase=%?+%?&amp;&amp;unit=%d&amp;except=%?&amp;max=%d\">%@</a>",                   scriptname, phrase, kword, unit, except, max, kword);      } else {        htmlprintf("<a href=\"%@?phrase=%?&amp;&amp;unit=%d&amp;except=%?&amp;max=%d\">%@</a>",                   scriptname, kword, unit, except, max, kword);      }    }    htmlprintf("</div>\n");    cblistclose(kwords);    cbmapclose(kmap);    wnum = cblistnum(awords);    kmap = listtomap(words);    htmlprintf("<dl>\n");    htmlprintf("<dt>Words: %d (or more)</dt>\n", wnum);    htmlprintf("<dd class=\"summary\">");    first = TRUE;    em = FALSE;    for(i = 0; i < wnum; i++){      normal = cblistval(nwords, i, &nwsiz);      asis = cblistval(awords, i, &awsiz);      if(awsiz < 1) continue;      if(normal[0] != '\0' && cbmapget(kmap, normal, nwsiz, NULL)){        if(!first) htmlprintf(" ");        if(!em) htmlprintf("<em class=\"key%d\">",                           cblistlsearch(words, normal, nwsiz) % EMCLASSNUM);        htmlprintf("%@", asis);        em = TRUE;      } else {        if(em) htmlprintf("</em>");        if(!first) htmlprintf(" ");        htmlprintf("%@", asis);        em = FALSE;      }      first = FALSE;    }    if(em) htmlprintf("</em>");    htmlprintf(" ...");    htmlprintf("</dd>\n");    htmlprintf("</dl>\n");    cbmapclose(kmap);    free(ubuf);    oddocclose(doc);  } else {    htmlprintf("<p>Retrieving the document failed because of `%@'.</p>\n", dperrmsg(dpecode));  }  odclose(odeum);}/* show help message */void showhelp(const CBLIST *help){  int i;  for(i = 0; i < cblistnum(help); i++){    htmlprintf("%s\n", cblistval(help, i, NULL));  }  htmlprintf("<div class=\"note\">Powered by QDBM %@.</div>\n", dpversion);}/* END OF FILE */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -