📄 index.c
字号:
static char rcsid[] = "index.c,v 1.72 1996/01/17 10:07:45 duane Exp";/* * wais.c -- Broker support for WAIS searching and indexing. * * William G. Camargo, Chanda Dharap, Penn State Univ. * Darren Hardy, Duane Wessels, Univ. of Colorado - Boulder * * ---------------------------------------------------------------------- * * You can define the following values in the broker.conf file: * * WAIS-Database database name for waisindex and waissearch * WAIS-Port port number for waisserver and waissearch * WAIS-Host hostname for waisserver and waissearch * WAIS-Log log file for waisserver * WAIS-Index waisindex command * WAIS-Parse waisparse command (WAIS, Inc. only) * WAIS-Server waisserver command * WAIS-Search waissearch command * WAIS-Lookup waislookup command (WAIS, Inc. only) * * ---------------------------------------------------------------------- * * When using freeWAIS-0.3 with boolean support, or TMC's wais-8-b5: * * To index an objects directory: * * % waisindex -r [-a] -d index-name directory * * prints information to stdout. Then, start up a server for the index: * * % waisserver -p port-num -e logfile -d index-directory & * * To search the index: * * % waissearch -h host -p port-num -d index-name keywords < /dev/null * * ---------------------------------------------------------------------- * * When using WAIS, Inc. version 2.0: * * To index an objects directory: * * % waisparse -parse soif -r dir | waisindex -d index-name [-append] * % waisdelete -d index-name file * * prints information to stdout. Then, start up a server for the index: * * % waisserver -p port-num -e logfile -d index-directory & * * To search the index: * * % echo keyword-search | waislookup -d index-name@HOST:port-num * * Or, if you have the WAIS, Inc. Client Toolkit, then you can * use the in-line client query code in waisquery.c by setting * WAIS-Lookup to ``inline'' and compiling the broker using * waisquery.c and linking with the Client Toolkit libraries. * Also, you need to #define USE_WAIS_INLINE in this index.c file. * * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include "broker.h"#include "log.h"#include "Wais/index.h"/* USE_WAIS_INLINE - define to include WAIS, Inc. in-line client support */#ifndef USE_WAIS_INLINE#undef USE_WAIS_INLINE#endif#define NOSTRUCT_STR \ "103 - ERROR: WAIS Indexer does not support structured queries.\n"#define BADQ_STR \ "103 - ERROR: WAIS Indexer cannot support your query.\n"/* Global variables */extern char *DIRpath;extern char *brk_obj_url;extern int IndexType;extern int IndexServer_pid;/* Global functions */extern char *SM_Get_Obj_Filename(); /* only UNIX filesystem SM *//* Local variables */static char *WAISdbname = NULL;static char *WAIShost = NULL;static char *WAISlog = NULL;static char *WAISindex = NULL;static char *WAISparse = NULL;static char *WAISserver = NULL;static char *WAISsearch = NULL;static char *WAISflavor = NULL;static char *WAISlookup = NULL;static char *WAISdelete = NULL;static char *WAISbin = NULL;static char *newobj_fn = NULL;static int WAISport = 0;static int WAIS_NewObj = 0;static int WAIS_illegal_query = 0;/* Local functions */static char *WAIS_do_qlist();static char *WAIS_build_select();static fd_t WAIS_getfd();/* * WAIS_Start_WAISserver - starts a waisserver, and sets IndexServer_pid */static void WAIS_Start_WAISserver(){ static char comm[BUFSIZ]; if (WAISport < 1) return; Log("Starting %s on port %d.\n", WAISserver, WAISport); /* WAIS, Inc and freeWAIS have the same server command */ sprintf(comm, "%s -p %d -e %s -d %s", WAISserver, WAISport, WAISlog, DIRpath);#if DEBUG1 Log("\t command :%s:\n", comm);#endif /* must use fork() rather than vfork() which causes memory leaks */ if ((IndexServer_pid = fork()) == 0) { /* child */ char *argv[64]; close_all_fds(3); memset(argv, '\0', sizeof(argv)); parse_argv(argv, comm); execvp(argv[0], argv); perror(argv[0]); _exit(1); } /* parent */ /* * leave IndexServer_pid negative so that it doesn't get * restarted later in do_query. */ if (IndexServer_pid < 0) { log_errno("fork"); return; } sleep(5); /* give WAISserver a little time */ Log("%s (pid %d) is on-line...\n", WAISserver, IndexServer_pid); return; /* parent */}/* * WAIS_Start_Indexing - Performs an indexing using the given command */static int WAIS_Start_Indexing(comm)char *comm;{ int pid, status = 0; static char buf[BUFSIZ]; char *cmd = comm; /* If there's a waisserver running, kill it */ if (WAISport > 0 && IndexServer_pid > 0) { Log("Killing waisserver (pid %d)...\n", IndexServer_pid);#ifdef USE_WAIS_INLINE teardown_search();#endif (void) kill(IndexServer_pid, SIGTERM); sleep(10); (void) kill(IndexServer_pid, SIGKILL); sleep(5); IndexServer_pid = 0; } /* Check if the command string has a pipe in it. If so, wrap */ /* /bin/sh -c ... around the command so that it gets executed */ /* properly. -DW */ if (strchr(comm, '|') != NULL) { sprintf(buf, "/bin/sh -c 'exec %s'", comm); cmd = buf; }#if DEBUG1 Log("\t command :%s:\n", cmd);#endif /* must use fork() rather than vfork() which causes memory leaks */ if ((pid = fork()) < 0) { log_errno("fork"); return ERROR; } if (pid == 0) { /* child */ char *argv[64]; close_all_fds(3); memset(argv, '\0', sizeof(argv)); parse_argv(argv, cmd); execvp(argv[0], argv); perror(argv[0]); _exit(1); } /* parent */ Log("Waiting for waisindex to finish...\n"); /* while waisindex is running, explicitly wait for it */ while (waitpid(pid, &status, WNOHANG) != pid) { select_loop(15, 0, 0); /* deny outside connections */ if (kill(pid, 0) != 0) break; /* child died, and was caught by sigreap */ } /* Restart waisserver if needed */ if (WAISport > 0) { WAIS_Start_WAISserver(); } return SUCCESS;}/* ----------------------------------------------------------------- WAIS_bulk_query -- send SOIF objects based on query ----------------------------------------------------------------- */static int WAIS_bulk_query(rsock, indexfp, ptime)int rsock;FILE *indexfp;time_t ptime;{ static char ret[BUFSIZ]; fd_t qfd, oldfd = -1; int cnt = 0; reg_t *bentry; FILE *fp; if ((fp = fdopen(rsock, "w")) == NULL) { perror("fdopen"); QM_send_bulk_err(rsock); return ERROR; } QM_send_bulk_begin(rsock); while (fgets(ret, BUFSIZ, indexfp) != NULL) { if (((qfd = WAIS_getfd(ret)) != ERROR) && (qfd != oldfd) && ((bentry = RG_Get_Entry(qfd)) != NULL) && (bentry->update_time >= ptime) && (QM_send_bulk_fd(qfd, fp, bentry) == SUCCESS)) { cnt++; } } fflush(fp); /* critical, must flush before termination */ QM_send_bulk_end(rsock); fclose(fp); return SUCCESS;}/* ----------------------------------------------------------------- WAIS_del_query -- delete objects based on query. ----------------------------------------------------------------- */static int WAIS_del_query(rsock, indexfp)int rsock;FILE *indexfp;{ static char ret[BUFSIZ]; fd_t qfd, oldfd = -1; reg_t *rme; int cnt = 0; while (fgets(ret, BUFSIZ, indexfp) != NULL) { if (((qfd = WAIS_getfd(ret)) != ERROR) && (qfd != oldfd) && ((rme = RG_Get_Entry(qfd)) != NULL)) { COL_DEL_Obj(rme); cnt++; } } Log("Deleted %d objects based on query.\n", cnt); return SUCCESS;}/* ----------------------------------------------------------------- WAIS_user_query -- Read the output of the WAIS query on indexfp, then send to rsock via protocol. ----------------------------------------------------------------- */static int WAIS_user_query(rsock, indexfp)int rsock;FILE *indexfp;{ int obcnt; int opsize; int score; int lines; static char ret[BUFSIZ]; char *opdata[2]; static char t[BUFSIZ]; char *s = NULL; fd_t qfd;#if DEBUG2 Log("\tparsing waissearch output:\n");#endif if (WAIS_illegal_query) { SWRITE(rsock, BADQ_STR, strlen(BADQ_STR)); return SUCCESS; } if (WAIS_gotstructured && strcasecmp(WAISflavor, "commercial-wais") != 0) { SWRITE(rsock, NOSTRUCT_STR, strlen(NOSTRUCT_STR)); return SUCCESS; } /* * Now, we read the result set and transfer the results to * the user. The OID is embedded in the 'Score:' line, and * there's always only 1 object match per line, unlike Glimpse. */ obcnt = 0; while (fgets(ret, BUFSIZ, indexfp) != NULL) {#if DEBUG3 Log("WAIS query returned: %s\n", ret);#endif /* See if this line has a valid OID */ qfd = WAIS_getfd(ret); if (qfd == ERROR || qfd < 0) continue; /* ignore */ opsize = 0; opdata[0] = opdata[1] = NULL; /* Reset opdata */ /* Grab the Score and # of lines if possible */ if (strcasecmp(WAISflavor, "commercial-wais") == 0) { if (((s = strstr(ret, "score")) != NULL) && (sscanf(s, "score %d len %d", &score, &lines) == 2)) { t[0] = '\0'; sprintf(t, "WAIS Results: Score: %d, length: %d", score, lines); opdata[opsize++] = t; } } else { if (((s = strstr(ret, "Score:")) != NULL) && (sscanf(s, "Score: %d, lines: %d", &score, &lines) == 2)) { t[0] = '\0'; sprintf(t, "WAIS Results: Score: %d, lines: %d", score, lines); opdata[opsize++] = t; } } if (QM_user_object(rsock, qfd, opsize, opdata) == SUCCESS) obcnt++; } QM_user_done(rsock, obcnt); return SUCCESS;}/* strips attr of all non-alpha-numeric characters */static void strip_attr(attr)char *attr;{ int i,j; static char s[BUFSIZ]; if (strcasecmp(WAISflavor, "commercial-wais")) return; for (i = j = 0; attr[i]; i++) if (isalnum((unsigned char) attr[i])) s[j++] = attr[i]; s[j] = '\0'; strcpy(attr, s);}/* ----------------------------------------------------------------- WAIS_do_qlist -- Recursive function to build a query from the list ----------------------------------------------------------------- */static char *WAIS_do_qlist(ql)qlist_t *ql;{ char *ll, *rl, *nl; if (ql->type == LOGICAL) { if (ql->op == NOT) return NULL; if ((ll = WAIS_do_qlist((qlist_t *) ql->llist)) == NULL) return NULL; if ((rl = WAIS_do_qlist((qlist_t *) ql->rlist)) == NULL) { xfree(ll); return NULL; } nl = xmalloc(SEL_SIZE); nl[0] = '('; nl[1] = '\0'; strcat(nl, ll); xfree(ll); switch (ql->op) { case AND: strcat(nl, " AND "); break; case OR: strcat(nl, " OR "); break; default:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -