⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 index.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
static char rcsid[] = "index.c,v 1.14 1996/01/04 04:07:06 duane Exp";/*  *  index.c -- Nebula stuff * *   William G. Camargo, Penn State Univ. *  Chanda Dharap,  Penn State Univ.   *  Darren Hardy, Univ. of Colorado - Boulder * *  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. *   *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): *   *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. *   *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. *   *  TERMS OF USE *     *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. *     *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. *   *  DERIVATIVE WORKS *   *    Users may make derivative works from the Harvest software, subject  *    to the following constraints: *   *      - You must include the above copyright notice and these  *        accompanying paragraphs in all forms of derivative works,  *        and any documentation and other materials related to such  *        distribution and use acknowledge that the software was  *        developed at the above institutions. *   *      - You must notify IRTF-RD regarding your distribution of  *        the derivative work. *   *      - You must clearly notify users that your are distributing  *        a modified version and not the original Harvest software. *   *      - Any derivative product is also subject to these copyright  *        and use restrictions. *   *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. *   *  HISTORY OF FREE SOFTWARE STATUS *   *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards.   *   */#include "broker.h"#include "log.h"#include "index.h"#define CLIENT 1#include "unp.h"/* Global variables */extern char *DIRpath;extern char *brk_obj_url;extern int IndexType;extern char *unpi_error;typedef struct NEBQ {    char *fname;    struct NEBQ *next;} NEBq;LOCAL NEBq *NEB_indexq = NULL;LOCAL NEBq *NEB_deleteq = NULL;LOCAL UNPI *uptr = NULL;LOCAL POINTER NEB_context = NIL;/* configuration variables */LOCAL char *NEB_cname;LOCAL char *NEB_host;LOCAL int NEB_port = 0;LOCAL int NEB_key = 0;/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX * *  * PRIVATE FUNCTIONS *  * XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX *//* ----------------------------------------------------------------- * * NEB_initcon -- initialize the connection to the Nebula server. * ----------------------------------------------------------------- */int NEB_initcon(){    char msg[STRMAX];    int context;    if ((uptr = unpi_connect(NEB_host, NEB_port)) == NULL) {	if (unpi_error != NULL) {	    sprintf(msg, "Unable to connect to Nebula server <%s>\n", unpi_error);	    free(unpi_error);	    fatal(msg);	} else {	    fatal("Connection error.\n");	}    }    if (!unpi_getcontext(uptr, NEB_cname, &context)) {#if DEBUG2	fprintf(stdout, "Unable to locate broker context.\n");#endif	return (NEB_Destroy_Full());	/* creates a new broker context */    }    NEB_context = CVUNID(uptr->address, context);}/* ----------------------------------------------------------------- * * NEB_Index_Object -- index a single object with the Nebula * harvest::index-object function. * ----------------------------------------------------------------- */LOCAL int NEB_Index_Object(entry)     reg_t *entry;{    static int fclass = -1;    static int fhandle = -1;    char *fn;    POINTER ptr1, ptr2;    /* ----- Initialization ----- */    unpi_error = NULL;    if (uptr == NULL)	NEB_initcon();    if (fclass == -1) {	if (!unpi_getfunction(uptr, "harvest::index-object", &fclass, &fhandle))	    return ERROR;#if DEBUG2	fprintf(stdout, "harvest::index-object: %d %d\n", fclass, fhandle);#endif    }    /* ----- Create argument list ----- */    fn = SM_Get_Obj_Filename(entry->FD);    ptr1 = CVLIST(CVSTRING(fn), NIL);    /* ----- Process RPC ----- */    ptr2 = ccall(uptr->channel, fclass, fhandle, NEB_key, NEB_context, ptr1, NIL);    if ((ptr2 == NIL) && (unpi_error != NULL)) {	fprintf(stderr, "Nebula index-object failed: %s\n", (char *) unpi_error);	FREEP(ptr1);	free(unpi_error);	return ERROR;    }#if DEBUG2    fprintf(stdout, "Successfully indexed object:\n");    writeexp(stdout, ptr2);    unpputc(stdout, '\n');#endif    /* ----- Clean up ----- */    FREEP(ptr1);    FREEP(ptr2);    free(fn);    return (SUCCESS);}/* ----------------------------------------------------------------- * * NEB_Index_Batch -- incrementally index the current batch of objects * using the Nebula function harvest::index-batch. * ----------------------------------------------------------------- */LOCAL int NEB_Index_Batch(){    static int fclass = -1;    static int fhandle = -1;    NEBq *qptr = NULL, *tptr = NULL;    POINTER ptr1, ptr2;    /* ----- Initialization ----- */    unpi_error = NULL;    if (uptr == NULL)	NEB_initcon();    if (fclass == -1) {	if (!unpi_getfunction(uptr, "harvest::index-batch", &fclass, &fhandle))	    return ERROR;#if DEBUG2	fprintf(stdout, "harvest::index-batch: %d %d\n", fclass, fhandle);#endif    }    /* ----- Create argument list ----- */    for (ptr1 = NIL, qptr = NEB_indexq; qptr != NULL; qptr = qptr->next)	ptr1 = CVLIST(CVSTRING(qptr->fname), ptr1);    ptr1 = CVLIST(ptr1, NIL);    /* ----- Process RPC ----- */    ptr2 = ccall(uptr->channel, fclass, fhandle, NEB_key, NEB_context, ptr1, NIL);    if ((ptr2 == NIL) && (unpi_error != NULL)) {	fprintf(stderr, "Nebula index-batch failed: %s\n", (char *) unpi_error);	FREEP(ptr1);	free(unpi_error);	return ERROR;    }#if DEBUG2    fprintf(stdout, "Successfully indexed objects:\n");    writeexp(stdout, ptr2);    unpputc(stdout, '\n');#endif    /* ----- Clean up ----- */    FREEP(ptr1);    FREEP(ptr2);    for (qptr = NEB_indexq; qptr != NULL; qptr = tptr) {	tptr = qptr->next;	free(qptr->fname);	free(qptr);    }    NEB_indexq = NULL;    return (SUCCESS);}/* ----------------------------------------------------------------- * * NEB_Index_Full -- remove the existing database and force a full * reindexing; this may take awhile.  This uses the Nebula function * harvest::index-full. * ----------------------------------------------------------------- */LOCAL int NEB_Index_Full(){    char buf[STRMAX];    static int fclass = -1;    static int fhandle = -1;    POINTER ptr1, ptr2;    NEBq *qptr, *tptr;    /* ----- Initialization ----- */    unpi_error = NULL;    if (uptr == NULL)	NEB_initcon();    if (fclass == -1) {	if (!unpi_getfunction(uptr, "harvest::index-full", &fclass, &fhandle)) {	    if (unpi_error != NULL) {		fprintf(stderr, "Nebula Error: %s\n", (char *) unpi_error);		free(unpi_error);	    }	    return ERROR;	}#if DEBUG2	fprintf(stdout, "harvest::index-full: %d %d\n", fclass, fhandle);#endif    }    /* ----- Remove the old context ----- */    if (NEB_Destroy_Full() != SUCCESS)	return ERROR;    /* ----- Create argument list ----- */    sprintf(buf, "%s/objects", DIRpath);    ptr1 = CVLIST(CVSTRING(buf), NIL);    /* ----- Process RPC ----- */    unpi_error = NULL;    ptr2 = ccall(uptr->channel, fclass, fhandle, NEB_key, NEB_context, ptr1, NIL);    if ((ptr2 == NIL) && (unpi_error != NULL)) {	fprintf(stderr, "Nebula index-full failed: %s\n", (char *) unpi_error);	FREEP(ptr1);	free(unpi_error);	return ERROR;    }#if DEBUG2    fprintf(stdout, "Successfully reindexed database:\n");    writeexp(stdout, ptr2);    unpputc(stdout, '\n');#endif    /* ----- Clean up ----- */    FREEP(ptr1);    FREEP(ptr2);    for (qptr = NEB_indexq; qptr != NULL; qptr = tptr) {	tptr = qptr->next;	free(qptr->fname);	free(qptr);    }    return (SUCCESS);}/* ----------------------------------------------------------------- * * NEB_Destroy_Object -- remove a single object from the Nebula * server using the function harvest::destroy-object. * ----------------------------------------------------------------- */LOCAL int NEB_Destroy_Object(entry)     reg_t *entry;{    static int fclass = -1;    static int fhandle = -1;    char *fn;    POINTER ptr1, ptr2;    /* ----- Initialization ----- */    unpi_error = NULL;    if (uptr == NULL)	NEB_initcon();    if (fclass == -1) {	if (!unpi_getfunction(uptr, "harvest::destroy-object", &fclass, &fhandle))	    return ERROR;#if DEBUG2	fprintf(stdout, "harvest::destroy-object: %d %d\n", fclass, fhandle);#endif    }    /* ----- Create argument list ----- */    fn = SM_Get_Obj_Filename(entry->FD);    ptr1 = CVLIST(CVSTRING(fn), NIL);    /* ----- Process RPC ----- */    ptr2 = ccall(uptr->channel, fclass, fhandle, NEB_key, NEB_context, ptr1, NIL);    if ((ptr2 == NIL) && (unpi_error != NULL)) {	fprintf(stderr, "Nebula destroy-object failed: %s\n", (char *) unpi_error);	FREEP(ptr1);	free(unpi_error);	return ERROR;    }#if DEBUG2    fprintf(stdout, "Successfully removed object:\n");    writeexp(stdout, ptr2);    unpputc(stdout, '\n');#endif    /* ----- Clean up ----- */    FREEP(ptr1);    FREEP(ptr2);    free(fn);    return (SUCCESS);}/* ----------------------------------------------------------------- * * NEB_Destroy_Batch -- incrementally index the current batch of objects. * ----------------------------------------------------------------- */LOCAL int NEB_Destroy_Batch(){    static int fclass = -1;    static int fhandle = -1;    NEBq *qptr = NULL, *tptr = NULL;    POINTER ptr1, ptr2;    /* ----- Initialization ----- */    unpi_error = NULL;    if (uptr == NULL)	NEB_initcon();    if (fclass == -1) {#if DEBUG3	fprintf(stdout, "Initializing harvest::destroy-batch.\n");#endif	if (!unpi_getfunction(uptr, "harvest::destroy-batch", &fclass, &fhandle))	    return ERROR;#if DEBUG3	fprintf(stdout, "harvest::destroy-batch: %d %d\n", fclass, fhandle);#endif    }    /* ----- Create argument list ----- */    for (ptr1 = NIL, qptr = NEB_deleteq; qptr != NULL; qptr = qptr->next)	ptr1 = CVLIST(CVSTRING(qptr->fname), ptr1);    ptr1 = CVLIST(ptr1, NIL);    /* ----- Process RPC ----- */    ptr2 = ccall(uptr->channel, fclass, fhandle, NEB_key, NEB_context, ptr1, NIL);    if ((ptr2 == NIL) && (unpi_error != NULL)) {	fprintf(stderr, "Nebula destroy-batch failed: %s\n", (char *) unpi_error);	FREEP(ptr1);	free(unpi_error);	return ERROR;    }#if DEBUG2    fprintf(stdout, "Successfully removed objects:\n");    writeexp(stdout, ptr2);    unpputc(stdout, '\n');#endif    /* ----- Clean up ----- */    FREEP(ptr1);    FREEP(ptr2);    for (qptr = NEB_deleteq; qptr != NULL; qptr = tptr) {	tptr = qptr->next;	free(qptr->fname);	free(qptr);    }    NEB_deleteq = NULL;    return (SUCCESS);}/* ----------------------------------------------------------------- * * NEB_Destroy_Full -- remove the existing database using the Nebula * harvest::destroy-full function.  This will force the creation of * a new broker context is saved in NEB_context. * ----------------------------------------------------------------- */LOCAL int NEB_Destroy_Full(){    static int fclass = -1;    static int fhandle = -1;    POINTER ptr1, ptr2, cptr;    /* ----- Initialization ----- */    unpi_error = NULL;    if (uptr == NULL)	NEB_initcon();    if (fclass == -1) {	if (!unpi_getfunction(uptr, "harvest::destroy-full", &fclass, &fhandle))	    return ERROR;#if DEBUG2	fprintf(stdout, "harvest::destroy-full: %d %d\n", fclass, fhandle);#endif    }    /* ----- Create argument list ----- */    ptr1 = CVLIST(CVSTRING(NEB_cname), NIL);    cptr = CVUNID(uptr->address, 4);    /* ----- Process RPC ----- */    ptr2 = ccall(uptr->channel, fclass, fhandle, NEB_key, cptr, ptr1, NIL);    if ((ptr2 == NIL) && (unpi_error != NULL)) {	fprintf(stderr, "Nebula destroy-full failed: %s\n", (char *) unpi_error);	FREEP(ptr1);	free(unpi_error);	return ERROR;    }#if DEBUG2    fprintf(stdout, "Successfully removed database:\n");    writeexp(stdout, ptr2);    unpputc(stdout, '\n');#endif    /* ----- Clean up ----- */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -