📄 index.c
字号:
static char rcsid[] = "index.c,v 1.5 1996/01/04 04:07:07 duane Exp";/* * index.c -- Broker indexing/search support using PLS, Inc.'s PLWeb 2.0 * * Darren Hardy, U. Colorado - Boulder * * DEBUG: section 101, level 1 Broker PLWeb indexing engine * * You can define the following values in the broker.conf file: * * PLS-Root Directory in which PLWeb is installed * PLS-DBgroup Name of PLWeb database group * PLS-DBname Name of PLWeb database * PLS-Num-Reorg Number of deletions before plreorg is run * * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include "broker.h"#include "log.h"#include "PLWeb/index.h"#ifndef USE_PARENS_FOR_BOOLEAN#undef USE_PARENS_FOR_BOOLEAN#endif#define BADQ_STR \ "103 - ERROR: PLWeb cannot support your query.\n"#define PLS_RESULT_TAG "PLWeb Results: "/* Global variables */extern char *DIRpath;extern char *brk_obj_url;extern int qsock;extern int IndexType;extern int QM_opaqueflag;extern int QM_gotphrase; /* got a quoted phrase or not */extern int IndexServer_pid;extern reg_t *Registry;extern char *SM_Get_Obj_Filename(); /* only UNIX filesystem SM *//* Local variables */#define LOCAL staticLOCAL char *PLWeb_plsroot = NULL;LOCAL char *PLWeb_dbgroup = NULL;LOCAL char *PLWeb_dbname = NULL;LOCAL int PLWeb_nreorg = 0;LOCAL int PLWeb_NewObj = 0;LOCAL int PLWeb_maxresults;LOCAL int PLWeb_illegal_query = 0;LOCAL int PLWeb_ncalled = 0; /* current number of queries against server */LOCAL int PLWeb_lifetime = 15 * 60;LOCAL int PLWeb_max_lifetime = 15 * 60; /* 15 minutes *//* Local functions */LOCAL int PLWeb_bulk_query();LOCAL int PLWeb_del_query();LOCAL int PLWeb_user_query();LOCAL char *PLWeb_do_qlist();LOCAL char *PLWeb_build_select();LOCAL fd_t PLWeb_getfd();#define BIG_BUFSIZ (8*BUFSIZ) /* for very long lines *//* ----------------------------------------------------------------- * * PLWeb_bulk_query - do bulk transfer of all objects that match the query * ----------------------------------------------------------------- */LOCAL int PLWeb_bulk_query(rsock, indexfp, ptime) int rsock; FILE *indexfp; time_t ptime;{ char ret[BIG_BUFSIZ]; fd_t qfd, oldfd = -1; int cnt = 0; reg_t *bentry; FILE *fp; if ((fp = fdopen(rsock, "w")) == NULL) { log_errno("fdopen"); QM_send_bulk_err(rsock); return ERROR; } QM_send_bulk_begin(rsock); while (fgets(ret, BIG_BUFSIZ, indexfp) != NULL) { if (((qfd = PLWeb_getfd(ret)) != ERROR) && (qfd != oldfd) && ((bentry = RG_Get_Entry(qfd)) != NULL) && (bentry->update_time >= ptime) && (QM_send_bulk_fd(qfd, fp, bentry) == SUCCESS)) { cnt++; } } fflush(fp); /* critical, must flush before termination */ QM_send_bulk_end(rsock); fclose(fp); return SUCCESS;}/* ----------------------------------------------------------------- * * PLWeb_del_query -- delete all objects that match the query. * ----------------------------------------------------------------- */LOCAL int PLWeb_del_query(rsock, indexfp) int rsock; FILE *indexfp;{ char ret[BIG_BUFSIZ]; fd_t qfd, oldfd = -1; int cnt = 0; reg_t *rme; while (fgets(ret, BIG_BUFSIZ, indexfp) != NULL) { if (((qfd = PLWeb_getfd(ret)) != ERROR) && (qfd != oldfd) && ((rme = RG_Get_Entry(qfd)) != NULL)) { COL_DEL_Obj(rme); cnt++; } } Log("Deleted %d objects based on query.\n", cnt); return SUCCESS;}/* ----------------------------------------------------------------- * * PLWeb_user_query -- Read the output of the PLWeb query on indexfp, then * send to rsock via protocol. * ----------------------------------------------------------------- */LOCAL int PLWeb_user_query(rsock, indexfp) int rsock; FILE *indexfp;{ fd_t fd1, fd2 = (fd_t) (-1); char inb[BIG_BUFSIZ], opb[BUFSIZ], *opdata[BIG_BUFSIZ], *tmp, *s; int opsize = 0, obcnt = 0, i, rank, sumsize; /* If the query was illegal, give up quickly */ if (PLWeb_illegal_query) { SWRITE(rsock, BADQ_STR, strlen(BADQ_STR)); return ERROR; } /* * Before we return the query results, we perform 2 write's on * the socket to the client to test whether or not the client * will be able to receive the query results. * We have to do two writes because the first will complete * even though the other side is gone. */ (void) write(rsock, PIPECHK, strlen(PIPECHK)); if (write(rsock, PIPECHK, strlen(PIPECHK)) == -1) { errorlog("Client is gone -- aborting user query results.\n"); close(rsock); return ERROR; } memset(opdata, '\0', BIG_BUFSIZ * sizeof(char *)); /* zero out opdata */ while (fgets(inb, BIG_BUFSIZ, indexfp) != NULL) { if ((fd1 = PLWeb_getfd(inb)) == ERROR) { continue; } (void) strtok(inb, "\t"); /* tag + URL */ rank = atoi(strtok(NULL, "\t")); /* rank info */ sumsize = atoi(strtok(NULL, "\t")); /* summary size */ sprintf(inb, "Rank: %d Summary Size: %d bytes\n", rank, sumsize); opdata[0] = xstrdup(inb); opdata[1] = NULL; opsize = 1; if ((fd1 != fd2) && (fd2 != (fd_t) (-1))) { /* return the previous object */ if (QM_user_object(rsock, fd2, opsize, opdata) == SUCCESS) obcnt++; /* free the opaque data */ for (i = 0; i < BUFSIZ; i++) { if (opdata[i] != NULL) { xfree(opdata[i]); opdata[i] = NULL; } } opsize = 0; } fd2 = fd1; } /* Get the last object */ if (fd2 != (fd_t) (-1)) { if (QM_user_object(rsock, fd2, opsize, opdata) == SUCCESS) obcnt++; } QM_user_done(rsock, obcnt); /* Free memory */ for (i = 0; i < BUFSIZ; i++) if (opdata[i] != NULL) xfree(opdata[i]); return SUCCESS;}/* ----------------------------------------------------------------- * * PLWeb_do_qlist -- Recursive function to build a query from the list. * ----------------------------------------------------------------- */LOCAL char *PLWeb_do_qlist(ql) qlist_t *ql;{#ifdef USE_PARENS_FOR_BOOLEAN char *ll, *rl; static char *nl; if (ql->type == LOGICAL) { if (ql->op == NOT) { return NULL; } if ((ll = PLWeb_do_qlist((qlist_t *) ql->llist)) == NULL) { return NULL; } if ((rl = PLWeb_do_qlist((qlist_t *) ql->rlist)) == NULL) { xfree(ll); return NULL; } nl = (char *) xmalloc(BUFSIZ); nl[0] = '('; nl[1] = '\0'; strcat(nl, ll); switch (ql->op) { case AND: strncat(nl, " AND ", 5); break; case OR: strncat(nl, " OR ", 4); break; default: xfree(nl); xfree(rl); xfree(ll); return NULL; } strcat(nl, rl); strcat(nl, ")"); xfree(ll); xfree(rl); return (nl); } return (PLWeb_build_select(ql));#else char *ll, *rl; if (ql->type == LOGICAL) { if (ql->op == NOT) { return NULL; } if ((ll = PLWeb_do_qlist((qlist_t *) ql->llist)) == NULL) { return NULL; } if ((rl = PLWeb_do_qlist((qlist_t *) ql->rlist)) == NULL) { xfree(ll); return NULL; } switch (ql->op) { case AND: strncat(ll, " AND ", 5); break; case OR: strncat(ll, " OR ", 4); break; default: xfree(rl); xfree(ll); return NULL; } strcat(ll, rl); xfree(rl); return (ll); } return (PLWeb_build_select(ql));#endif}/* ----------------------------------------------------------------- * * PLWeb_build_select -- Build the basic PLWeb query. * ----------------------------------------------------------------- */LOCAL char *PLWeb_build_select(ql) qlist_t *ql;{ static char *tmp; if (ql->op == EXACT) { tmp = (char *) xmalloc(BUFSIZ);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -