⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qfts.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
      }    } else {      putchar(*format);    }    format++;  }  va_end(ap);}/* print mime headers */void printmime(void){  printf("Content-Type: text/html; charset=%s\r\n", enc);  printf("Cache-Control: no-cache, must-revalidate\r\n");  printf("Pragma: no-cache\r\n");  printf("\r\n");  fflush(stdout);}/* print the declarations of XHTML */void printdecl(void){  htmlprintf("<?xml version=\"1.0\" encoding=\"%s\"?>\n", enc);  htmlprintf("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" "             "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n");}/* print headers */void printhead(void){  htmlprintf("<head>\n");  htmlprintf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\" />\n", enc);  htmlprintf("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\" />\n");  htmlprintf("<link rel=\"contents\" href=\"./\" />\n");  htmlprintf("<title>%@</title>\n", title);  htmlprintf("<style type=\"text/css\">\n");  htmlprintf("body { background-color: #eeeeee; color: #111111;"             " margin: 0em 0em; padding: 0em 0em; }\n");  htmlprintf("h1,h2,p { margin: 0.5em 0.8em; }\n");  htmlprintf("a { color: #0022aa; text-decoration: none; }\n");  htmlprintf("a:hover { color: #1144ff; text-decoration: underline; }\n");  htmlprintf("a.head { color: #111111; text-decoration: none; }\n");  htmlprintf("em { font-weight: bold; font-style: normal; color: #001166; }\n");  htmlprintf("form { background-color: #ddddee; margin: 0em 0em; padding: 0.7em 0.8em;"             " border-bottom: 1pt solid #bbbbcc; }\n");  htmlprintf("dd { margin: 0.1em 0.3em 0.1em 1.8em; font-size: small; }\n");  htmlprintf(".result { margin: 0em 0em; padding: 0.5em 1.0em; }\n");  htmlprintf(".title { font-weight: bold; }\n");  htmlprintf(".summary { background-color: #ddddee; padding: 0.2em 0.3em;"             " border: 1pt solid #bbbbcc; }\n");  htmlprintf(".summary em { border: 1pt solid #bbcccc; padding: 0.0em 0.2em; }\n");  htmlprintf(".key0 { background-color: #ddeeee; color: #001177; }\n");  htmlprintf(".key1 { background-color: #eeddee; color: #002255; }\n");  htmlprintf(".key2 { background-color: #eeeedd; color: #112233; }\n");  htmlprintf(".key3 { background-color: #ccddee; color: #112233; }\n");  htmlprintf(".key4 { background-color: #ddffee; color: #112233; }\n");  htmlprintf(".key5 { background-color: #ffeedd; color: #112233; }\n");  htmlprintf(".note { margin: 0.5em 0.5em; text-align: right; color: #666666; }\n");  htmlprintf(".blur { color: #666666; }\n");  htmlprintf(".missing { color: #888888; }\n");  htmlprintf("</style>\n");  htmlprintf("</head>\n");}/* show search form */void showform(const char *phrase, int unit, const char *except, int max){  int i;  htmlprintf("<form action=\"%@\" method=\"get\">\n", scriptname);  htmlprintf("<div>\n");  htmlprintf("Phrase: <input type=\"text\" name=\"phrase\" value=\"%@\" size=\"48\" />\n",             phrase);  htmlprintf("<select name=\"unit\">\n");  htmlprintf("<option value=\"%d\"%s>all of them</option>\n",             UNITAND, unit == UNITAND ? " selected=\"selected\"" : "");  htmlprintf("<option value=\"%d\"%s>any of them</option>\n",             UNITOR, unit == UNITOR ? " selected=\"selected\"" : "");  htmlprintf("</select>\n");  htmlprintf("/\n");  htmlprintf("Except: <input type=\"text\" name=\"except\" value=\"%@\" size=\"16\" />\n",             except);  htmlprintf("/\n");  htmlprintf("<select name=\"max\">\n");  for(i = 1; i <= 256; i *= 2){    htmlprintf("<option value=\"%d\"%s>%d per page</option>\n",               i, i == max ? " selected=\"selected\"" : "", i);  }  htmlprintf("</select>\n");  htmlprintf("/\n");  htmlprintf("<input type=\"submit\" value=\"Search\" />\n");  htmlprintf("</div>\n");  htmlprintf("</form>\n");}/* break phrase into words */CBLIST *getwords(const char *phrase){  CBLIST *words, *tmp;  char *normal;  int i;  words = cblistopen();  tmp = odbreaktext(phrase);  for(i = 0; i < cblistnum(tmp); i++){    normal = odnormalizeword(cblistval(tmp, i, NULL));    if(strlen(normal) > 0) cblistpush(words, normal, -1);    free(normal);  }  cblistclose(tmp);  return words;}/* set the original score vector */void setovec(CBMAP *scores, int *vec){  int i;  const char *kbuf;  cbmapiterinit(scores);  for(i = 0; i < RELVECNUM; i++){    if((kbuf = cbmapiternext(scores, NULL)) != NULL){      vec[i] = atoi(cbmapget(scores, kbuf, -1, NULL));    } else {      vec[i] = 0;    }  }}/* set the target score vector */void settvec(CBMAP *osc, CBMAP *tsc, int *vec){  int i;  const char *kbuf, *vbuf;  if(tsc){    cbmapiterinit(osc);    for(i = 0; i < RELVECNUM; i++){      if((kbuf = cbmapiternext(osc, NULL)) != NULL){        vbuf = cbmapget(tsc, kbuf, -1, NULL);        vec[i] = vbuf ? atoi(vbuf) : 0;      } else {        vec[i] = 0;      }    }  } else {    for(i = 0; i < RELVECNUM; i++){      vec[i] = 0;    }  }}/* show relational search result */void showrelresult(int rel, int max, int skip, const char *index,                   const char *prefix, const char *diridx){  ODEUM *odeum;  DEPOT *scdb;  ODDOC *doc, *tdoc;  CBMAP *scores, *tsc;  CBLIST *words;  ODPAIR *pairs, *last, *tmp;  const char *uri, *word, *title, *date, *author;  char path[PATHBUFSIZ], *ubuf, *mbuf, *tmbuf, *tubuf, numbuf[NUMBUFSIZ];  int i, j, ulen, msiz, tmsiz, pnum, hnum, lnum, tnum;  int ovec[RELVECNUM], tvec[RELVECNUM], id;  double ival;  if(!(odeum = odopen(index, OD_OREADER))){    htmlprintf("<p>The index cannot be open because of `%@'.</p>\n", dperrmsg(dpecode));    return;  }  sprintf(path, "%s%c%s", index, PATHCHR, SCDBNAME);  if(!(scdb = dpopen(path, DP_OREADER, -1))){    htmlprintf("<p>The score database cannot be open because of `%@'.</p>\n", dperrmsg(dpecode));    odclose(odeum);    return;  }  if(!(doc = odgetbyid(odeum, rel))){    htmlprintf("<p>The document cannot be got because of `%@'.</p>\n", dperrmsg(dpecode));    dpclose(scdb);    odclose(odeum);  }  ubuf = cbmemdup(oddocuri(doc), -1);  if(diridx && bwmatch(ubuf, diridx)){    ulen = strlen(ubuf) - strlen(diridx);    if(ulen > 1 && ubuf[ulen-1] == '/') ubuf[ulen] = '\0';  }  uri = ubuf;  if(fwmatch(uri, DEFPREFIX)) uri += strlen(DEFPREFIX);  if(!(mbuf = dpget(scdb, (char *)&rel, sizeof(int), 0, -1, &msiz))){    htmlprintf("<p>Scores cannot be got because of `%@'.</p>\n", dperrmsg(dpecode));    free(ubuf);    oddocclose(doc);    dpclose(scdb);    odclose(odeum);  }  scores = cbmapload(mbuf, msiz);  words = cbmapkeys(scores);  last = NULL;  lnum = 0;  for(i = 0; i < RELKEYNUM && i < cblistnum(words); i++){    word = cblistval(words, i, NULL);    if(!(pairs = odsearch(odeum, word, RELDOCMAX, &pnum))) continue;    if((hnum = odsearchdnum(odeum, word)) < 0) hnum = 0;    ival = odlogarithm(hnum);    ival = (ival * ival) / 2.0;    if(ival < 2.0) ival = 2.0;    for(j = 0; j < pnum; j++){      pairs[j].score /= ival;    }    if(last){      tmp = odpairsor(last, lnum, pairs, pnum, &tnum);      free(last);      free(pairs);      last = tmp;      lnum = tnum;    } else {      last = pairs;      lnum = pnum;    }  }  if(last && lnum > 0){    setovec(scores, ovec);    for(i = 0; i < lnum; i++){      if((tmbuf = dpget(scdb, (char *)&(last[i].id), sizeof(int), 0, -1, &tmsiz)) != NULL){        tsc = cbmapload(tmbuf, tmsiz);        free(tmbuf);      } else {        tsc = NULL;      }      settvec(scores, tsc, tvec);      if(tsc) cbmapclose(tsc);      last[i].score = odvectorcosine(ovec, tvec, RELVECNUM) * 10000;      if(last[i].score >= 9999) last[i].score = 10000;    }    odpairssort(last, lnum);    for(i = 0; i < lnum; i++){      if(last[i].score < 1){        lnum = i;        break;      }    }  }  if(last && lnum > 0){    htmlprintf("<p>Relational documents with <a href=\"%@%@\">%@%@</a> : <em>%d</em> hits</p>\n",               prefix, uri, prefix, uri, lnum);    for(i = skip; i < lnum && i < max + skip; i++){      if(!(tdoc = odgetbyid(odeum, last[i].id))){        htmlprintf("<dl>\n");        htmlprintf("<dt class=\"missing\">%d: (replaced or purged)</dt>\n", i + 1);        htmlprintf("</dl>\n");        continue;      }      tubuf = cbmemdup(oddocuri(tdoc), -1);      if(diridx && bwmatch(tubuf, diridx)){        ulen = strlen(tubuf) - strlen(diridx);        if(ulen > 1 && tubuf[ulen-1] == '/') tubuf[ulen] = '\0';      }      uri = tubuf;      if(fwmatch(uri, DEFPREFIX)) uri += strlen(DEFPREFIX);      title = oddocgetattr(tdoc, "title");      date = oddocgetattr(tdoc, "date");      author = oddocgetattr(tdoc, "author");      htmlprintf("<dl>\n");      sprintf(numbuf, "%.2f", (double)last[i].score / 100.0);      htmlprintf("<dt class=\"blur\">%d: <a href=\"%@%@\" class=\"title\">%@</a>"                 " (%@%%)</dt>\n", i + 1, prefix, uri,                 title && strlen(title) > 0 ? title : "(untitled)", numbuf);      tsc = NULL;      if(scdb){        id = oddocid(tdoc);        if((tmbuf = dpget(scdb, (char *)&id, sizeof(int), 0, -1, &tmsiz)) != NULL){          tsc = cbmapload(tmbuf, tmsiz);          free(tmbuf);        }      }      showsummary(tdoc, NULL, "", 0, "", max, tsc);      if(tsc) cbmapclose(tsc);      htmlprintf("<dd class=\"blur\">");      htmlprintf("%@%@", prefix, uri);      if(date) htmlprintf(" (%@)", date);      if(author) htmlprintf(" (%@)", author);      htmlprintf("</dd>\n");      htmlprintf("</dl>\n");      free(tubuf);      oddocclose(tdoc);    }  } else {    htmlprintf("<p>No document hits.</p>\n");  }  if(last) free(last);  htmlprintf("<div class=\"note\">");  if(skip > 0){    htmlprintf("<a href=\"%@?rel=%d&amp;max=%d&amp;skip=%d\">[PREV]</a>",               scriptname, rel, max, skip - max);  } else {    htmlprintf("<span class=\"blur\">[PREV]</span>");  }  htmlprintf(" ");  if(i < lnum){    htmlprintf("<a href=\"%@?rel=%d&amp;max=%d&amp;skip=%d\">[NEXT]</a>",               scriptname, rel, max, skip + max);  } else {    htmlprintf("<span class=\"blur\">[NEXT]</span>");  }  htmlprintf("</div>\n");  htmlprintf("<div class=\"note\">The index contains %d documents and %d words.</div>\n",             oddnum(odeum), odwnum(odeum));  cblistclose(words);  cbmapclose(scores);  free(mbuf);  free(ubuf);  oddocclose(doc);  dpclose(scdb);  odclose(odeum);}/* show search result */void showresult(const CBLIST *words, const char *phrase, int unit,                const CBLIST *ewords, const char *except, int max, int skip,                const char *index, const char *prefix, const char *diridx){  ODEUM *odeum;  DEPOT *scdb;  ODPAIR *pairs, *last, *tmp;  ODDOC *doc;  CBMAP *scores;  const char *word, *uri, *title, *date, *author;  char path[PATHBUFSIZ], *ubuf, *mbuf;  int i, j, pnum, lnum, hnum, tnum, ulen, id, msiz;  double ival;  if(!(odeum = odopen(index, OD_OREADER))){    htmlprintf("<p>The index cannot be open because of `%@'.</p>\n", dperrmsg(dpecode));    return;  }  sprintf(path, "%s%c%s", index, PATHCHR, SCDBNAME);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -