⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
static char rcsid[] = "$Id: main.c,v 2.5 2000/01/21 17:37:33 sxw Exp $";/* *  main.c -- main module of the Harvest Broker. * *  DEBUG: section  77, level 1		Broker main *  AUTHOR: Harvest derived * *  Harvest Indexer http://harvest.sourceforge.net/ *  ----------------------------------------------- * *  The Harvest Indexer is a continued development of code developed by *  the Harvest Project. Development is carried out by numerous individuals *  in the Internet community, and is not officially connected with the *  original Harvest Project or its funding sources. * *  Please mail lee@arco.de if you are interested in participating *  in the development effort. * *  This program is free software; you can redistribute it and/or modify *  it under the terms of the GNU General Public License as published by *  the Free Software Foundation; either version 2 of the License, or *  (at your option) any later version. * *  This program is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *  GNU General Public License for more details. * *  You should have received a copy of the GNU General Public License *  along with this program; if not, write to the Free Software *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. * *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): * *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. * *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. * *  TERMS OF USE * *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. * *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. * *  DERIVATIVE WORKS * *    Users may make derivative works from the Harvest software, subject *    to the following constraints: * *      - You must include the above copyright notice and these *        accompanying paragraphs in all forms of derivative works, *        and any documentation and other materials related to such *        distribution and use acknowledge that the software was *        developed at the above institutions. * *      - You must notify IRTF-RD regarding your distribution of *        the derivative work. * *      - You must clearly notify users that your are distributing *        a modified version and not the original Harvest software. * *      - Any derivative product is also subject to these copyright *        and use restrictions. * *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. * *  HISTORY OF FREE SOFTWARE STATUS * *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards. * */#include "broker.h"#include "log.h"#include <locale.h>#include "../common/include/version.h"/* Global Variables - administrative */time_t collect_rate;time_t clean_rate;time_t drefresh_rate;char *HName = NULL;char *DIRpath = NULL;char *BrkHomePage = NULL;char *Gather = NULL;char *WebServer = NULL;char *WebPath = NULL;char *brk_obj_url = NULL;char *ColConfig = NULL;char *Tstr = NULL;char *passwd = NULL;char *IndexerType = NULL;char *aproc = NULL;int IndexType;int qport;int reg_limit;int max_events;int do_fast_start = 0;char *obj_desc;int obj_desc_s;struct indexing_routines *INDEXER;extern REGISTRY_HEADER *RegHdr;extern int broker_offline;/* Global Variables - system */int ndenied_connections = 0;int qsock = -1;time_t Cur_Time;extern int Num_Ev;extern int Log_Terse;time_t Next_Collection = 0;time_t Next_Clean = 0;int collect_flag = 1;		/* default to always do a collection */int comflag = 0;int LogFlag = 0;int IndexServer_pid = 0;int IndexServer_ForceRestart = 0;	/* used by #restart-index-server */int ReadQueryTimeout = READ_QUERY_TIMEOUT;/* Local functions */static int Indexer_Init();static int Initialize_Broker();static void disconnect();/* *  ------------------------------------------------------------------ *           Runtime support for switching Indexing subsystems *  ------------------------------------------------------------------ *  Below is the definitions needed for the run-time indexing support. *  If you want to integrate a new indexing sub-systems, then you'll *  need to add your Indexer functions below. *//*  The Indexer needs a header file containing the function defs */#ifdef USE_GLIMPSE#include "Glimpse/index.h"#endif#ifdef USE_WAIS#include "Wais/index.h"#endif#ifdef USE_NETFIND#include "Netfind/index.h"#endif#ifdef USE_GRASS#include "Grass/index.h"#endif#ifdef USE_VERITY#include "index.h"#endif#ifdef USE_PLWEB#include "PLWeb/index.h"#endif#ifdef USE_SWISH#include "Swish/index.h"#endif#ifdef USE_SH#include "Sh/index.h"#endif/*  These are the valid Indexer types from broker.conf */#define Valid_Indexer_Type(type)    \	( \	!strcmp(type, "Glimpse") || \	!strcmp(type, "Netfind") || \	!strcmp(type, "Grass")   || \	!strcmp(type, "Verity")  || \	!strcmp(type, "PLWeb")   || \	!strcmp(type, "WAIS")    || \	!strcmp(type, "Swish")   || \	!strcmp(type, "Sh")         \	)/*  This is the structure that is used to map the Indexing routines */struct indexing_routines Indexer_Routines[] ={#ifdef USE_GLIMPSE	{"Glimpse",	 Glimpse_IND_Index_Start,	 Glimpse_IND_Index_Flush,	 Glimpse_IND_New_Object,	 Glimpse_IND_Destroy_Obj,	 Glimpse_IND_Index_Full,	 Glimpse_IND_Index_Incremental,	 Glimpse_IND_initialize,	 Glimpse_IND_config,	 Glimpse_IND_do_query,	 Glimpse_IND_Init_Flags,	 Glimpse_IND_Set_Flags	},#endif#ifdef USE_WAIS	{"WAIS",	 WAIS_IND_Index_Start,	 WAIS_IND_Index_Flush,	 WAIS_IND_New_Object,	 WAIS_IND_Destroy_Obj,	 WAIS_IND_Index_Full,	 WAIS_IND_Index_Incremental,	 WAIS_IND_initialize,	 WAIS_IND_config,	 WAIS_IND_do_query,	 WAIS_IND_Init_Flags,	 WAIS_IND_Set_Flags	},#endif#ifdef USE_NETFIND	{"Netfind",	 NF_IND_Index_Start,	 NF_IND_Index_Flush,	 NF_IND_New_Object,	 NF_IND_Destroy_Obj,	 NF_IND_Index_Full,	 NF_IND_Index_Incremental,	 NF_IND_initialize,	 NF_IND_config,	 NF_IND_do_query,	 NF_IND_Init_Flags,	 NF_IND_Set_Flags	},#endif#ifdef USE_GRASS	{"Grass",	 GRASS_IND_Index_Start,	 GRASS_IND_Index_Flush,	 GRASS_IND_New_Object,	 GRASS_IND_Destroy_Obj,	 GRASS_IND_Index_Full,	 GRASS_IND_Index_Incremental,	 GRASS_IND_initialize,	 GRASS_IND_config,	 GRASS_IND_do_query,	 GRASS_IND_Init_Flags,	 GRASS_IND_Set_Flags	},#endif#ifdef USE_PLWEB	{"PLWeb",	 PLWeb_IND_Index_Start,	 PLWeb_IND_Index_Flush,	 PLWeb_IND_New_Object,	 PLWeb_IND_Destroy_Obj,	 PLWeb_IND_Index_Full,	 PLWeb_IND_Index_Incremental,	 PLWeb_IND_initialize,	 PLWeb_IND_config,	 PLWeb_IND_do_query,	 PLWeb_IND_Init_Flags,	 PLWeb_IND_Set_Flags	},#endif#ifdef USE_VERITY	{"Verity",	 VERITY_IND_Index_Start,	 VERITY_IND_Index_Flush,	 VERITY_IND_New_Object,	 VERITY_IND_Destroy_Obj,	 VERITY_IND_Index_Full,	 VERITY_IND_Index_Incremental,	 VERITY_IND_initialize,	 VERITY_IND_config,	 VERITY_IND_do_query,	 VERITY_IND_Init_Flags,	 VERITY_IND_Set_Flags	}#endif#ifdef USE_SWISH	{"Swish",	 Swish_IND_Index_Start,	 Swish_IND_Index_Flush,	 Swish_IND_New_Object,	 Swish_IND_Destroy_Obj,	 Swish_IND_Index_Full,	 Swish_IND_Index_Incremental,	 Swish_IND_initialize,	 Swish_IND_config,	 Swish_IND_do_query,	 Swish_IND_Init_Flags,	 Swish_IND_Set_Flags	}#endif#ifdef USE_SH	{"Sh",	 Sh_IND_Index_Start,	 Sh_IND_Index_Flush,	 Sh_IND_New_Object,	 Sh_IND_Destroy_Obj,	 Sh_IND_Index_Full,	 Sh_IND_Index_Incremental,	 Sh_IND_initialize,	 Sh_IND_config,	 Sh_IND_do_query,	 Sh_IND_Init_Flags,	 Sh_IND_Set_Flags	}#endif};#define MAX_INDEXER_TYPES \	(sizeof(Indexer_Routines)/sizeof(struct indexing_routines))char *cffile;			/* global, so can be passed to verityindex */int main(argc, argv)int argc;char *argv[];{	int status;	if (argc == 2 && !strcmp(argv[1], "-V") ) {		printf("Harvest Broker version %s.\n", HARVEST_VERSION);		exit(0);	}	(void) setlocale (LC_ALL, "");	disconnect();		/* be a nice, robust daemon */#ifdef HAVE_SETLINEBUF	setlinebuf(stdout);	setlinebuf(stderr);#else	setbuf(stdout, NULL);	setbuf(stderr, NULL);#endif	debug_reset();	debug_init();		/* read $HARVEST_DEBUG */	init_log3("broker", stdout, stdout);	Log("Initializing the Broker...\n");	/* intialize global state */	if (argc > 1 && argv[1] != NULL) {		argc--; argv++;		cffile = xstrdup(*argv);	} else {		cffile = xstrdup("admin/broker.conf");	}	/* set up signal handlers */	(void) signal(SIGINT,  sigdie);	(void) signal(SIGTERM, sigdie);	(void) signal(SIGPIPE, SIG_IGN);	(void) signal(SIGCHLD, sigreap);        for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) {		if (!strcmp(*argv, "-nocol")) {			collect_flag = 0;		} else if (!strcmp(*argv, "-nevercol")) {			collect_flag = 0;			Next_Collection = 1073741823; /* 2^30-1 : far future */		} else if (!strcmp(*argv, "-fast")) {			do_fast_start = 1;		} else if (!strcmp(*argv, "-new")) {			/* start fresh by deleting LASTUPDATE */			(void) unlink("./admin/LASTUPDATE");		} else if (!strncmp(*argv, "-D", 2)) {#ifdef USE_FAST_DEBUG_LOGGING			extern int do_log_sync;			do_log_sync = 0; 	/* very fast logging */			freopen("broker.out", "a", stdout);			freopen("broker.out", "a", stderr);#endif			debug_flag(*argv);		}	}	if (Initialize_Broker(cffile) == ERROR)		fatal("Could not initialize Broker.\n");	if (collect_flag && (Next_Collection > 0)) {		if (EV_add_aevent(COLLECTION) != ERROR)			collect_flag = -1;	}	/* main loop */	if (listen(qsock, 5) < 0) {		log_errno("listen");		sigdie(98);	}	Log("Broker is now on-line (pid %d, port %d)...\n", getpid(), qport);	while (1) {		(void)UTIL_Get_Time();	/* sets Cur_Time */		/* try to add maintence events */		if ((collect_flag == 0) && (Next_Collection > 0) &&                    (Cur_Time >= Next_Collection)) {			if (EV_add_aevent(COLLECTION) != ERROR)				collect_flag = -1;		}		if ((Next_Clean >= 0) && (Cur_Time >= Next_Clean)) {			if (EV_add_aevent(CLEANING) != ERROR)				Next_Clean = -1;		}		if ((RegHdr->nrecords_deleted > reg_limit) && (comflag == 0)) {			if (EV_add_aevent(RCOMP) != ERROR)				comflag = -1;		}		(void)EV_Do_Event();		/*		 *  Only block for 15 seconds, since SunOS 4.1.x may		 *  swap out any process that waits longer than 20 secs.		 */		while (select_loop(Num_Ev ? 0 : 15, 0, 1) == 1)			; /* add as many events as possible */		if (Num_Ev == 0) {	/* Timeout, no events */			/* catch anything that sigreap may have missed */			while (waitpid(-1, &status, WNOHANG) > 0);		}	}	exit(0);	/* END OF PROGRAM */	/*NOTREACHED*/}/*===============================================================*//* signal handlers */int Broker_Shutdown(){	Log("Shuting down the broker...\n");	/* Stop the important stuff */	RG_Registry_Shutdown();	(void)LOG_close_log();	(void)close(qsock);	/* Close all file descriptors */	close_all_fds(3);	/* kill the index server */	if (IndexServer_pid > 0) {		Log("Killing IndexServer (pid %d)...\n", IndexServer_pid);		(void)kill(IndexServer_pid, SIGTERM);		sleep(5);		(void)kill(IndexServer_pid, SIGKILL);	}	AD_run_admin_process();	Log("Denied %d connections during this session.\n",ndenied_connections);	Log("************\n");	Log("*** DONE ***\n");	Log("************\n");	fflush(stdout);	fflush(stderr);	exit(0);	return SUCCESS;}void sigdie(sig)int sig;{	Log("Received signal %d...\n", sig);	(void)Broker_Shutdown();	_exit(sig);}void sigreap(sig)int sig;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -