⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 index.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
static char rcsid[] = "$Id: index.c,v 1.1 1997/10/27 22:47:08 sxw Exp $";/* *  index.c -- Swish indexer interface * *  Simon Wilkinson 14/10/97 * */#include "broker.h"#include "log.h"#include "Swish/index.h"/* Global variables */extern char *DIRpath;extern char *brk_obj_url;extern int IndexType;int SW_illegal_query = 0;char *SW_SwishInd = NULL;int SW_lifetime = 15 * 60;int SW_max_lifetime = 15 * 60;int SW_NewObj = 0;#define BIG_BUFSIZ      (8*BUFSIZ) #define BADQ_STR \        "103 - ERROR: Swish Indexer cannot support your query.\n"static fd_t SW_getfd(char *line) {  char *tmp =NULL;    if ((tmp = strstr(line, "/OBJ")) == NULL) return ERROR;  tmp+=4;  return((fd_t) atol(tmp));}static int SW_Start_Indexing(char *comm){  int pid, status = 0;    Debug(103,1,("Command: %s\n",comm));    if ((pid = fork()) < 0) {    log_errno("fork");    return ERROR;  }   if (pid == 0) {    char *argv[64];        memset(argv, '\0', sizeof(argv));    parse_argv(argv, comm);    execvp(argv[0],argv);    perror(argv[0]);    _exit(1);  }  Log("Waiting for swish to finish indexing...\n");  while(waitpid(pid, &status, WNOHANG) !=pid) {    select_loop(15, 0, 0); /* No connections */    if (kill(pid, 0) != 0)      break; /* child dead and reaped */  }    return SUCCESS;}static int SW_bulk_query(int rsock, FILE* indexfp, time_t ptime) {  static char ret[BIG_BUFSIZ];  fd_t qfd;  fd_t oldfd = -1;  int cnt = 0;  reg_t *bentry = NULL;  FILE *fp = NULL;    if ((fp = fdopen(rsock, "w")) == NULL) {    log_errno("fdopen");    QM_send_bulk_err(rsock);    return ERROR;  }  QM_send_bulk_begin(rsock);  while (fgets(ret, BIG_BUFSIZ, indexfp) != NULL)    if (((qfd = SW_getfd(ret)) != ERROR) &&	((bentry = RG_Get_Entry(qfd)) != NULL) &&	(bentry->update_time >= ptime))      QM_send_bulk_fd(qfd, fp, bentry) == SUCCESS;  fflush(fp);  QM_send_bulk_end(rsock);  fclose(fp);  return SUCCESS;}static int SW_del_query(int rsock, FILE *indexfp) {  char ret[BUFSIZ];  fd_t qfd;  int cnt =0;  reg_t *rme;    while (fgets(ret, BUFSIZ, indexfp) !=NULL)    if (((qfd = SW_getfd(ret)) != ERROR) &&	((rme = RG_Get_Entry(qfd)) != NULL)) {      COL_DEL_Obj(rme);      cnt ++;    }  Log("Deleted %d objects based on query.\n", cnt);  return SUCCESS;}static int SW_user_query(int rsock, FILE *indexfp) {  fd_t fd;  char inb[BUFSIZ], opb[BUFSIZ], *opdata[BUFSIZ], *tmp, *s;  int opsize = 0, obcnt = 0, i;    /* If the query was illegal give up */  if (SW_illegal_query) {    SWRITE(rsock, BADQ_STR, strlen(BADQ_STR));    return ERROR;  }    /* Check the clients not gone bye-bye */  (void) write(rsock, PIPECHK, strlen(PIPECHK));  if (write(rsock, PIPECHK, strlen(PIPECHK)) == -1) {    errorlog("Client is gone -- aborting user query results.\n");    close(rsock);    return ERROR;  }  memset(opdata, '\0', BUFSIZ * sizeof(char *));  while (fgets(inb, BUFSIZ, indexfp) !=NULL) {    if ((fd = SW_getfd(inb)) == ERROR)      continue;    if (QM_user_object(rsock, fd, opsize, opdata) == SUCCESS)      obcnt++;  }  QM_user_done(rsock, obcnt);    return SUCCESS;}static char *SW_build_select(qlist_t *ql){  char *tmp = NULL;    if (ql->op == EXACT || ql->op == REGEX) {    tmp = (char *) xmalloc(BUFSIZ);    tmp[0] = '\0';    if (ql->llist) {      sprintf(tmp, "%s=%s",ql->llist,ql->rlist);      xfree(ql->rlist); ql->rlist=NULL;      xfree(ql->llist); ql->llist=NULL;      return(tmp);    }    sprintf(tmp, "%s", ql->rlist);    xfree(ql->rlist); ql->rlist=NULL;    return(tmp);  }  return NULL;}static char *SW_do_qlist(qlist_t *ql){  char *ll, *rl , *nl;    if (ql==NULL) return NULL;      if (ql->type == LOGICAL) {        nl = (char *) xmalloc(BUFSIZ);        ll = SW_do_qlist((qlist_t *) ql->llist);        if ((rl = SW_do_qlist((qlist_t *) ql->rlist)) == NULL) {      if (ll!=NULL) xfree(ll);      return NULL;    }    nl[0]= '\0';        if (ll!=NULL) strcat(nl,ll);        switch (ql->op) {    case AND:      strncat(nl, " and ", 5);      break;    case OR:      strncat(nl, " or ", 4);      break;    case NOT:      strncat(nl, " not ", 5);      break;    default:      xfree(nl);      xfree(rl);      if (ll!=NULL) xfree(ll);      return NULL;    }    strcat(nl, rl);    if (ll!=NULL) xfree(ll);    xfree(rl);    return(nl);  }  return (SW_build_select(ql));}		/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX *                              PUBLIC FUNCTIONS                       * XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX *//* ----------------------------------------------------------------- *   Swish_IND_New_Object -- index a new object * ----------------------------------------------------------------- */int Swish_IND_New_Object(reg_t *entry){  int ret = SUCCESS;    if (IndexType == I_PER_OBJ) {    Log("The Swish broker doesn't support per object indexing\n");    ret = FAIL;  }    if (ret == SUCCESS) SW_NewObj++;    return ret;}/* ----------------------------------------------------------------- *   Swish_IND_Index_Full -- builds an index of all of the object files. * ----------------------------------------------------------------- */ int Swish_IND_Index_Full(){  char comm[BUFSIZ];    Log("Begin Swish Full Indexing ...\n");  sprintf(comm,  "%s -i %s/objects -f %s/index",SW_SwishInd, DIRpath,	  DIRpath);  SW_Start_Indexing(comm);  Log("Finished Swish Full Indexing.\n");  return SUCCESS;}/* ----------------------------------------------------------------- *   Swish_IND_Index_Incremental -- do a full incremental update of the index. * ----------------------------------------------------------------- */ int Swish_IND_Index_Incremental(){  Log("The SWISH broker doesn't support incremental indexing\n");  Log("Using full indexing instead ...\n");  return Swish_IND_Index_Full();}/* ----------------------------------------------------------------- *   Swish_IND_Index_Start -- prepare for indexing a stream of objects. * ----------------------------------------------------------------- */int Swish_IND_Index_Start(){  SW_NewObj = 0;  return SUCCESS;}/* ----------------------------------------------------------------- *   Swish_IND_Index_Flush -- finish indexing a stream of objects. * ----------------------------------------------------------------- */int Swish_IND_Index_Flush(){  if (SW_NewObj > 0) {    switch (IndexType) {    case I_FULL:      return (Swish_IND_Index_Full());    case I_INCR:      return (Swish_IND_Index_Incremental());    case I_PER_OBJ:      break;    default:      fatal("Swish_IND_Index_Flush: Internal error.\n");    }  }  return SUCCESS;}/* ----------------------------------------------------------------- *   Swish_IND_Destroy_Obj -- remove an object from the indexer. * ----------------------------------------------------------------- */int Swish_IND_Destroy_Obj(reg_t *entry){  return SUCCESS;}/* ----------------------------------------------------------------- *   Swish_IND_initialize -- initialize interface to indexer * ----------------------------------------------------------------- */int Swish_IND_initialize(){  /* Force full indexing */  IndexType = I_FULL;    return SUCCESS;}/* ----------------------------------------------------------------- *   Swish_IND_Init_Flags -- initialize parser flags * ----------------------------------------------------------------- */void Swish_IND_Init_Flags(){}/* ----------------------------------------------------------------- *   Swish_IND_Set_Flags -- set query parser flag   * ----------------------------------------------------------------- */void Swish_IND_Set_Flags(char *flag, char *val){}/* ----------------------------------------------------------------- *   Swish_IND_config -- configure indexer specific variables * ----------------------------------------------------------------- */int Swish_IND_config(char *value, char *tag){  if (tag == NULL || value == NULL)    return ERROR;    if (strcasecmp(tag, "Swish") == 0) {    SW_SwishInd= xstrdup(value);    if (verify_exe(SW_SwishInd) == ERROR) return ERROR;  }  return SUCCESS;}/* ----------------------------------------------------------------- *   Swish_IND_do_query -- construct a query based upon the query list 			 structure. * ----------------------------------------------------------------- */ int Swish_IND_do_query(qlist_t *ql, int rsock, int qflag, time_t ptime){  char *patstr = NULL;  static char commandstr[BUFSIZ];  char *tfn = NULL;  int err=SUCCESS;  FILE *indexfp = NULL;    patstr = SW_do_qlist(ql);    if (patstr != NULL) {    sprintf(commandstr,"%s -f %s/index -w '%s'",SW_SwishInd,	    DIRpath, patstr);    xfree(patstr);        /* Need (for now) a tmpfile for the output */    if ((tfn = tempnam(NULL, "query")) != NULL) {      strcat(commandstr, " > ");      strcat(commandstr, tfn);    } else {      SWRITE(rsock, IND_FAIL, IND_FAIL_S);      return ERROR;    }    Log("Swish search command: %s\n", commandstr);        /* Run the query, but restrict its lifetime */    do_system_lifetime(commandstr, SW_lifetime);        /* Now process the tempfile that should contain the results */    if ((indexfp = fopen(tfn, "r")) == NULL) {      log_errno(tfn);      unlink(tfn);      xfree(tfn);      if (qflag == UQUERY) 	SWRITE(rsock, IND_FAIL, IND_FAIL_S)      else	QM_send_bulk_err(rsock);      close(rsock);      return ERROR;    }    switch(qflag) {    case QBULK:#ifdef FORK_ON_BULK      if (fork() == 0) {      /* child */	int e[3];	e[0] = rsock;	e[1] = fileno(indexfp);	e[2] = -1;	close_all_fds_except(3, e);	SW_bulk_query(rsock, indexfp, ptime);	fclose(indexfp);	unlink(tfn);	close(rsock);	_exit(0);      }      err = SUCCESS;#else      err = SW_bulk_query(rsock,indexfp,ptime);#endif      break;    case UQUERY:      err = SW_user_query(rsock,indexfp);      break;    case QDELETE:      err = SW_del_query(rsock, indexfp);    default:      break;    }    fclose(indexfp);    unlink(tfn);    xfree(tfn);  } else if (qflag == QBULK) {    QM_send_bulk_err(rsock);    err = ERROR;  } else {    write (rsock, ERR_MSG, strlen(ERR_MSG));    err = ERROR;  }  close(rsock);    return err;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -