📄 index.c
字号:
static char rcsid[] = "$Id: index.c,v 2.8 2000/02/03 12:45:55 sxw Exp $";/* * index.c -- Broker indexing/search support using Glimpse * * DEBUG: section 102, level 1 Broker glimpse indexing engine * AUTHOR: Harvest derived * * You can define the following values in the broker.conf file: * * Glimpse 'glimpse'command * GlimpseIndex 'glimpseindex' command * GlimpseIndex-Option 'glimpseindex' options * GlimpseIndex-Flags 'glimpseindex' (extra) flags * GlimpseServer 'glimpseserver' command * GlimpseServer-Host 'glimpseserver' host * GlimpseServer-Restart Restart glimpseserver after every N queries * Glimpse-MaxLife Max lifetime value * * * Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ * --------------------------------------------------- * * The Harvest Indexer is a continued development of code developed by * the Harvest Project. Development is carried out by numerous individuals * in the Internet community, and is not officially connected with the * original Harvest Project or its funding sources. * * Please mail harvest@tardis.ed.ac.uk if you are interested in participating * in the development effort. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include "broker.h"#include "log.h"#include "Glimpse/index.h"#ifndef USE_PARENS_FOR_BOOLEAN#define USE_PARENS_FOR_BOOLEAN#endif#ifndef MAX_OPDATA_SIZE#define MAX_OPDATA_SIZE 1024#endif/* some machines do not have this defined in netinet/in.h/* eg. SGI */#ifndef IPPORT_USERRESERVED#define IPPORT_USERRESERVED 5000#endif/* Global variables */extern char *DIRpath;extern char *brk_obj_url;extern int qsock;extern int IndexType;extern int QM_opaqueflag;extern int QM_gotphrase; /* got a quoted phrase or not */extern int IndexServer_pid;extern int IndexServer_ForceRestart; /* used by #restart-index-server */extern char *SM_Get_Obj_Filename(); /* only UNIX filesystem SM *//* Local functions */#define LOCAL staticLOCAL int GL_Index_Object _PARAMS((reg_t *));LOCAL char *GL_do_qlist _PARAMS((qlist_t *));LOCAL char *GL_build_select _PARAMS((qlist_t *));LOCAL fd_t GL_getfd _PARAMS((char *));LOCAL void Glimpse_Start_Glimpseserver _PARAMS((void));LOCAL void Glimpse_Kill_Glimpseserver _PARAMS((void));LOCAL int Glimpse_Start_Indexing _PARAMS((char *));/* Local variables */LOCAL char *GL_Glimpse = NULL;LOCAL char *GL_GlimpseInd = NULL;LOCAL char *GL_GlimpseServer = NULL;LOCAL char *GL_GlimpseSrvHost = NULL;LOCAL int GL_NewObj;LOCAL int GL_GlimpseSrvPort = 0;LOCAL int GL_GlimpseSrvRestart = 0;LOCAL int GL_caseflag;LOCAL int GL_wordflag;LOCAL int GL_errflag;LOCAL int GL_regexflag;LOCAL int GL_noregex;LOCAL int GL_maxresults = 0; /* old way */LOCAL int GL_maxlines = 0; /* new way */LOCAL int GL_maxfiles = 0; /* new way */LOCAL char *GL_IndexOption = NULL;LOCAL char *GL_IndexFlags = NULL;LOCAL int GL_illegal_query = 0;LOCAL int GL_ncalled = 0; /* number of queries against current GLsvr */LOCAL int GL_lifetime = 15 * 60;LOCAL int GL_max_lifetime = 15 * 60; /* 15 minutes */#define BADQ_STR \ "103 - ERROR: Glimpse Indexer cannot support your query.\n"#define REPLUSWORD_ERR \ "103 - ERROR: Glimpse Indexer cannot support regular expression matching on word boundaries!\n"#define BIG_BUFSIZ (8*BUFSIZ) /* for very long lines *//* * Glimpse_Start_Glimpseserver - starts a Glimpse server process. */LOCAL void Glimpse_Start_Glimpseserver(){ static char comm[BUFSIZ]; static int ntries = 0; if (!strcasecmp(GL_GlimpseServer, "false")) { GL_GlimpseSrvPort = -1; return; } if (GL_GlimpseSrvPort <= IPPORT_USERRESERVED) { /* choose a random port number between 16384-30000 */#if defined(HAVE_SRAND48) && defined(HAVE_LRAND48) srand48(time(NULL)); GL_GlimpseSrvPort = (lrand48() & 0x3FFF) | 0x4000;#else srand(time(NULL)); GL_GlimpseSrvPort = (rand() & 0x3FFF) | 0x4000;#endif if (GL_GlimpseSrvPort >= 30000) GL_GlimpseSrvPort -= 2767; } Log("Starting %s on port %d.\n", GL_GlimpseServer, GL_GlimpseSrvPort); sprintf(comm, "%s -H %s -K %d", GL_GlimpseServer, DIRpath, GL_GlimpseSrvPort); Debug(102, 1, ("\tcommand:%s:\n", comm)); /* must use fork() rather than vfork() which causes memory leaks */ if ((IndexServer_pid = fork()) == 0) { /* child */ char *argv[64]; close_all_fds(3); memset(argv, '\0', sizeof(argv)); parse_argv(argv, comm); execvp(argv[0], argv); perror(argv[0]); _exit(1); } /* parent */ /* * leave IndexServer_pid negative so that it doesn't get * restarted later in do_query. */ ntries++; if (IndexServer_pid < 0) { log_errno("fork"); return; } sleep(5); /* give glimpseserver a little time */ /* See if it is still running. It might have problems binding */ /* to the port or some other nonsense. */ if (kill(IndexServer_pid, 0) < 0) { Log("%s failed to start. The full command is\n %s\n", GL_GlimpseServer, comm); IndexServer_pid = 0; /* make it try again */ if (ntries > 3) { /* only three times */ IndexServer_pid = -1; GL_GlimpseSrvPort = 0; } return; } Log("%s (pid %d) is on-line...\n", GL_GlimpseServer, IndexServer_pid); ntries = 0; /* reset */ return; /* parent */}LOCAL void Glimpse_Kill_Glimpseserver(){ int nsleep = 60; if (IndexServer_pid > 0) { Log("Killing glimpseserver (pid %d)...\n", IndexServer_pid); (void) kill(IndexServer_pid, SIGUSR1); /* clean up */ sleep(5); (void) kill(IndexServer_pid, SIGTERM); /* die */ while (--nsleep > 0 && !kill(IndexServer_pid, 0)) sleep(1); (void) kill(IndexServer_pid, SIGKILL); /* I mean it, die */ /* Dont sleep 60 seconds anymore. It wasn't */ /* working on Solaris 2.4 anyway. Now */ /* glimpseserver uses SO_REUSEADDR instead. */ /* -DW 7/11/95 */ /* * Log("Waiting %d seconds for glimpseserver port to be released...\n", nsleep); * sleep(nsleep); */ } IndexServer_pid = 0;}/* * Glimpse_Start_Indexing - Start a glimpseindex process. */LOCAL int Glimpse_Start_Indexing(comm) char *comm;{ int pid; int status = 0; /* If there's a glimpseserver running, kill it */ if (GL_GlimpseSrvPort > 0 && IndexServer_pid > 0) { Glimpse_Kill_Glimpseserver(); } Debug(102, 1, ("\tcommand: %s:\n", comm)); /* must use fork() rather than vfork() which causes memory leaks */ if ((pid = fork()) < 0) { log_errno("fork"); return ERROR; } if (pid == 0) { /* child */ char *argv[64]; close_all_fds(3); memset(argv, '\0', sizeof(argv)); parse_argv(argv, comm); execvp(argv[0], argv); perror(argv[0]); _exit(1); } /* parent */ Log("Waiting for glimpseindex to finish...\n"); /* while glimpseindex is running, explicitly wait for it */ while (waitpid(pid, &status, WNOHANG) != pid) { select_loop(15, 0, 0); /* deny outside connections */ if (kill(pid, 0) != 0) break; /* child died, and was caught by sigreap */ } /* Restart glimpseserver */ if (strcasecmp(GL_GlimpseServer, "false")) { Glimpse_Start_Glimpseserver(); GL_ncalled = 0; } return SUCCESS;}/* ----------------------------------------------------------------- * * GL_Index_Object -- using glimpse -a to index a single object * ----------------------------------------------------------------- */LOCAL int GL_Index_Object(entry) reg_t *entry;{ static char comm[BUFSIZ]; char *fn = NULL; fn = SM_Get_Obj_Filename(entry->FD); sprintf(comm, "%s %s %s -a -H %s %s", GL_GlimpseInd, GL_IndexOption, GL_IndexFlags, DIRpath, fn); xfree(fn); fn = NULL; return (Glimpse_Start_Indexing(comm));}/* ----------------------------------------------------------------- * * GL_bulk_query - do bulk transfer of all objects that match the query * ----------------------------------------------------------------- */LOCAL int GL_bulk_query(rsock, indexfp, ptime) int rsock; FILE *indexfp; time_t ptime;{ static char ret[BIG_BUFSIZ]; fd_t qfd; fd_t oldfd = -1; int cnt = 0; reg_t *bentry = NULL; FILE *fp = NULL; if ((fp = fdopen(rsock, "w")) == NULL) { log_errno("fdopen"); QM_send_bulk_err(rsock); return ERROR; } QM_send_bulk_begin(rsock); while (fgets(ret, BIG_BUFSIZ, indexfp) != NULL) { if (((qfd = GL_getfd(ret)) != ERROR) && (qfd != oldfd) && ((bentry = RG_Get_Entry(qfd)) != NULL) && (bentry->update_time >= ptime) && (QM_send_bulk_fd(qfd, fp, bentry) == SUCCESS)) { cnt++; } } fflush(fp); /* critical, must flush before termination */ QM_send_bulk_end(rsock); fclose(fp); return SUCCESS;}/* ----------------------------------------------------------------- * * GL_del_query -- delete all objects that match the query. * ----------------------------------------------------------------- */LOCAL int GL_del_query(rsock, indexfp) int rsock; FILE *indexfp;{ static char ret[BIG_BUFSIZ]; fd_t qfd, oldfd = -1; int cnt = 0; reg_t *rme = NULL; while (fgets(ret, BIG_BUFSIZ, indexfp) != NULL) { if (((qfd = GL_getfd(ret)) != ERROR) && (qfd != oldfd) && ((rme = RG_Get_Entry(qfd)) != NULL)) { COL_DEL_Obj(rme); cnt++; } } Log("Deleted %d objects based on query.\n", cnt); return SUCCESS;}/* ----------------------------------------------------------------- * * GL_user_query -- Read the output of the Glimpse query on indexfp, then * send to rsock via protocol. * ----------------------------------------------------------------- */LOCAL int GL_user_query(rsock, indexfp) int rsock; FILE *indexfp;{ fd_t fd1; fd_t fd2 = (fd_t) (-1); char *inb = NULL; char *opb = NULL; char **opdata = NULL; char *tmp = NULL; char *tmp2 = NULL; char *s = NULL; int opsize = 0; int obcnt = 0; int i; /* If the query was illegal, give up quickly */ if (GL_illegal_query) { SWRITE(rsock, BADQ_STR, strlen(BADQ_STR)); return ERROR; } /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -