⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 qfts.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
/************************************************************************************************* * CGI script for full-text search *                                                      Copyright (C) 2000-2003 Mikio Hirabayashi * This file is part of QDBM, Quick Database Manager. * QDBM is free software; you can redistribute it and/or modify it under the terms of the GNU * Lesser General Public License as published by the Free Software Foundation; either version * 2.1 of the License or any later version.  QDBM is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more * details. * You should have received a copy of the GNU Lesser General Public License along with QDBM; if * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA. *************************************************************************************************/#include <depot.h>#include <cabin.h>#include <odeum.h>#include <stdlib.h>#include <stdio.h>#include <string.h>#include <stdarg.h>#undef TRUE#define TRUE           1                 /* boolean true */#undef FALSE#define FALSE          0                 /* boolean false */#define CONFFILE    "qfts.conf"          /* name of the configuration file */#define DEFENC      "US-ASCII"           /* default encoding */#define DEFLANG     "en"                 /* default language */#define DEFTITLE    "Odeum on WWW"       /* default title */#define DEFINDEX    "casket"             /* directory containing database files */#define DEFPREFIX   "./"                 /* prefix of the URI of a document */#define RDATAMAX    262144               /* max size of data to read */#define NUMBUFSIZ   32                   /* size of a buffer for a number */#define DEFMAX      16                   /* default max number of shown documents */#define SUMTOP      24                   /* number of adopted words as top of summary */#define SUMWIDTH    16                   /* number of adopted words around a keyword */#define SUMWORDMAX  96                   /* max number of words in summary */#define KEYWORDS    16                   /* number of shown keywords */#define EMCLASSNUM  6                    /* number of classes for em elements */#define SCDBNAME    "_score"             /* name of the database for scores */#define SCSHOWNUM   8                    /* number of shown scores */#define RELKEYNUM   16                   /* number of words to use as relational search */#define RELDOCMAX   2048                 /* number of target words as relational search */#define RELVECNUM   32                   /* number of dimension of score vector */#define PATHBUFSIZ  1024                 /* size of a path buffer */#define PATHCHR     '/'                  /* delimiter character of path */enum {  UNITAND,  UNITOR};/* for Win32 */#if defined(_WIN32)#undef PATHCHR#define PATHCHR     '\\'#endif/* global variables */const char *scriptname;                  /* name of the script */const char *enc;                         /* encoding of the page */const char *lang;                        /* language of the page */const char *title;                       /* title of the page *//* function prototypes */int main(int argc, char **argv);int fwmatch(const char *str, const char *key);int bwmatch(const char *str, const char *key);const char *skiplabel(const char *str);CBMAP *getparams(void);void senderror(int code, const char *tag, const char *message);void htmlprintf(const char *format, ...);void printmime(void);void printdecl(void);void printhead(void);void showform(const char *phrase, int unit, const char *except, int max);CBLIST *getwords(const char *phrase);void setovec(CBMAP *scores, int *vec);void settvec(CBMAP *osc, CBMAP *tsc, int *vec);void showrelresult(int rel, int max, int skip, const char *index,                   const char *prefix, const char *diridx);void showresult(const CBLIST *words, const char *phrase, int unit,                const CBLIST *ewords, const char *except, int max, int skip,                const char *index, const char *prefix, const char *diridx);void showsummary(const ODDOC *doc, const CBLIST *kwords, const char *phrase, int unit,                 const char *except, int max, CBMAP *scores);CBMAP *listtomap(const CBLIST *list);void showwords(int id, const CBLIST *words, const char *phrase, int unit, const char *except,               int max, const char *index, const char *prefix, const char *diridx);void showhelp(const CBLIST *help);/* main routine */int main(int argc, char **argv){  CBMAP *params;  CBLIST *lines, *help, *words, *ewords;  const char *tmp, *index, *prefix, *except, *diridx, *phrase;  int i, unit, max, skip, id, rel;  /* set configurations */  scriptname = argv[0];  if((tmp = getenv("SCRIPT_NAME")) != NULL) scriptname = tmp;  enc = NULL;  lang = NULL;  title = NULL;  index = NULL;  prefix = NULL;  diridx = NULL;  help = cblistopen();;  if((lines = cbreadlines(CONFFILE)) != NULL){    for(i = 0; i < cblistnum(lines); i++){      tmp = cblistval(lines, i, NULL);      if(fwmatch(tmp, "encoding:")){        enc = skiplabel(tmp);      } else if(fwmatch(tmp, "lang:")){        lang = skiplabel(tmp);      } else if(fwmatch(tmp, "title:")){        title = skiplabel(tmp);      } else if(fwmatch(tmp, "index:")){        index = skiplabel(tmp);      } else if(fwmatch(tmp, "prefix:")){        prefix = skiplabel(tmp);      } else if(fwmatch(tmp, "diridx:")){        diridx = skiplabel(tmp);      } else if(fwmatch(tmp, "help:")){        cblistpush(help, skiplabel(tmp), -1);      }    }  }  if(!enc) enc = DEFENC;  if(!lang) lang = DEFLANG;  if(!title) title = DEFTITLE;  if(!index) index = DEFINDEX;  if(!prefix) prefix = DEFPREFIX;  /* read parameters */  phrase = NULL;  except = NULL;  unit = UNITAND;  max = 0;  skip = 0;  id = 0;  rel = 0;  params = getparams();  if((tmp = cbmapget(params, "phrase", -1, NULL)) != NULL) phrase = tmp;  if((tmp = cbmapget(params, "except", -1, NULL)) != NULL) except = tmp;  if((tmp = cbmapget(params, "unit", -1, NULL)) != NULL) unit = atoi(tmp);  if((tmp = cbmapget(params, "max", -1, NULL)) != NULL) max = atoi(tmp);  if((tmp = cbmapget(params, "skip", -1, NULL)) != NULL) skip = atoi(tmp);  if((tmp = cbmapget(params, "id", -1, NULL)) != NULL) id = atoi(tmp);  if((tmp = cbmapget(params, "rel", -1, NULL)) != NULL) rel = atoi(tmp);  if(!phrase) phrase = "";  if(!except) except = "";  if(max < 1) max = DEFMAX;  if(skip < 0) skip = 0;  /* show page */  printmime();  printdecl();  htmlprintf("<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"%s\" lang=\"%s\">\n",             lang, lang);  printhead();  htmlprintf("<body>\n");  showform(phrase, unit, except, max);  htmlprintf("<div class=\"result\">\n");  words = getwords(phrase);  ewords = getwords(except);  if(id > 0){    showwords(id, words, phrase, unit, except, max, index, prefix, diridx);  } else if(rel > 0){    showrelresult(rel, max, skip, index, prefix, diridx);  } else if(cblistnum(words) > 0){    showresult(words, phrase, unit, ewords, except, max, skip, index, prefix, diridx);  } else if(strlen(phrase) > 0){    htmlprintf("<p>No effective word was extracted from the phrase.</p>\n");  } else {    showhelp(help);  }  cblistclose(ewords);  cblistclose(words);  htmlprintf("</div>\n");  htmlprintf("</body>\n");  htmlprintf("</html>\n");  /* release resources */  cbmapclose(params);  if(lines) cblistclose(lines);  cblistclose(help);  return 0;}/* forward matching */int fwmatch(const char *str, const char *key){  int len, i;  len = strlen(key);  for(i = 0; i < len; i++){    if(str[i] != key[i] || str[i] == '\0') return FALSE;  }  return TRUE;}/* backward matching */int bwmatch(const char *str, const char *key){  int slen, klen, i;  slen = strlen(str);  klen = strlen(key);  for(i = 1; i <= klen; i++){    if(str[slen-i] != key[klen-i] || i > slen) return FALSE;  }  return TRUE;}/* skip the label of a line */const char *skiplabel(const char *str){  if(!(str = strchr(str, ':'))) return str;  str++;  while(*str != '\0' && (*str == ' ' || *str == '\t')){    str++;  }  return str;}/* get a map of the CGI parameters */CBMAP *getparams(void){  CBMAP *params;  CBLIST *pairs;  char *rbuf, *buf, *key, *val, *dkey, *dval;  const char *tmp;  int i, len, c;  params = cbmapopen();  rbuf = NULL;  buf = NULL;  if((tmp = getenv("CONTENT_LENGTH")) != NULL && (len = atoi(tmp)) > 0 && len <= RDATAMAX){    rbuf = malloc(len + 1);    for(i = 0; i < len && (c = getchar()) != EOF; i++){      rbuf[i] = c;    }    rbuf[i] = '\0';    if(i == len) buf = rbuf;  } else {    buf = getenv("QUERY_STRING");  }  if(buf != NULL){    buf = cbmemdup(buf, -1);    pairs = cbsplit(buf, -1, "&");    for(i = 0; i < cblistnum(pairs); i++){      key = cbmemdup(cblistval(pairs, i, NULL), -1);      if((val = strchr(key, '=')) != NULL){        *(val++) = '\0';        dkey = cburldecode(key, NULL);        dval = cburldecode(val, NULL);        cbmapput(params, dkey, -1, dval, -1, FALSE);        free(dval);        free(dkey);      }      free(key);    }    cblistclose(pairs);    free(buf);  }  free(rbuf);  return params;}/* send error status */void senderror(int code, const char *tag, const char *message){  printf("Status: %d %s\r\n", code, tag);  printf("Content-Type: text/plain; charset=US-ASCII\r\n");  printf("\r\n");  printf("%s\n", message);}/* HTML-oriented printf */void htmlprintf(const char *format, ...){  va_list ap;  char *tmp;  unsigned char c;  va_start(ap, format);  while(*format != '\0'){    if(*format == '%'){      format++;      switch(*format){      case 's':        tmp = va_arg(ap, char *);        if(!tmp) tmp = "(null)";        printf("%s", tmp);        break;      case 'd':        printf("%d", va_arg(ap, int));        break;      case '@':        tmp = va_arg(ap, char *);        if(!tmp) tmp = "(null)";        while(*tmp){          switch(*tmp){          case '&': printf("&amp;"); break;          case '<': printf("&lt;"); break;          case '>': printf("&gt;"); break;          case '"': printf("&quot;"); break;          default: putchar(*tmp); break;          }          tmp++;        }        break;      case '?':        tmp = va_arg(ap, char *);        if(!tmp) tmp = "(null)";        while(*tmp){          c = *(unsigned char *)tmp;          if((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||             (c >= '0' && c <= '9') || (c != '\0' && strchr("_-.", c))){            putchar(c);          } else if(c == ' '){            putchar('+');          } else {            printf("%%%02X", c);          }          tmp++;        }        break;      case '%':        putchar('%');        break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -