⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
{	int status, cpid;	if ((cpid = waitpid(-1, &status, WNOHANG)) > 0) {		Debug(77,1,("Child process exited: pid %d\n", cpid));	}#ifdef _HARVEST_SYSV_	(void) signal(sig, sigreap);	/* reset signal handler */#endif}/*===============================================================*//* * verify_path() - Verify that the absolute path is readable; if not, exit */int verify_path(file)char *file;{	if ((file[0] == '/') && (access(file, R_OK) < 0)) {		Log("ERROR: %s is not readable.  Change your broker.conf file!\n", file);		return ERROR;	}	return SUCCESS;}/* * verify_exe() - Verify that the absolute path is executable; if not, exit */int verify_exe(file)char *file;{	if ((file[0] == '/') && (access(file, X_OK) < 0)) {		Log("ERROR: %s is not executable.  Change your broker.conf file!\n", file);		return ERROR;	}	return SUCCESS;}/*===============================================================*//* Get Host Name */int Initialize_Name(){	char *p = getfullhostname();	if (p == NULL) {		Log("WARNING: Internal Error: getfullhostname() failed...\n");		return ERROR;	}	HName = xstrdup(p);	Debug(77,1,("Broker Host is %s.\n", HName));	return SUCCESS;}/* Initialize communication */int Initialize_Com(){	static struct sockaddr_in brkr;	int one = 1;	if ((qsock = socket(AF_INET, SOCK_STREAM, 0)) < 0) {		log_errno("socket");		fatal("socket failed");	}    	if ((setsockopt(qsock, SOL_SOCKET, SO_REUSEADDR, (char*) &one,	     sizeof(one))) < 0) {		log_errno("setsockopt (SO_REUSEADDR = on)");		fatal("setsockopt failed");	}	/* name socket using wildcards */	brkr.sin_family = AF_INET;	brkr.sin_addr.s_addr = htonl(INADDR_ANY);	brkr.sin_port = htons(qport);	if (bind(qsock, (struct sockaddr *) &brkr, sizeof brkr) < 0) {		log_errno("bind");		(void)close(qsock);		fatal("Cannot not bind to port %d.  Exiting...\n", qport);	}	return SUCCESS;}/* *  Change global variables */int set_str(tag, value)char *tag;char *value;{	unsigned int logf;	int Set_Next_Collection();	/* These are all paths that need to be verified */	if (strcasecmp(tag, S_BRKDIR) == 0) {		/* Assign Value */		DIRpath = xstrdup(value);		if (verify_path(DIRpath) == ERROR)			return ERROR;	} else if (strcasecmp(tag, S_BRKOFFLINE) == 0) {		if (value != NULL && !strcasecmp(value, "yes"))			broker_offline = 1;	} else if (strcasecmp(tag, S_BRKHP) == 0) {		/* Assign Value */		BrkHomePage = xstrdup(value);	} else if (strcasecmp(tag, S_COLCONF) == 0) {		ColConfig = xstrdup(value);		if (verify_path(ColConfig) == ERROR)			return ERROR;	} else if (strcasecmp(tag, S_GATHER) == 0) {		Gather = xstrdup(value);		if (verify_exe(Gather) == ERROR)			return ERROR;		/* The rest is not paths that need to be verified */	} else if (strcasecmp(tag, S_WEBS) == 0) {		WebServer = xstrdup(value);	} else if (strcasecmp(tag, S_WEBPATH) == 0) {		WebPath = xstrdup(value);	} else if (strcasecmp(tag, S_PASSWD) == 0) {		passwd = xstrdup(value);	} else if (strcasecmp(tag, S_APROC) == 0) {		aproc = xstrdup(value);	} else if (strcasecmp(tag, S_DESC) == 0) {		if (obj_desc != NULL)			xfree(obj_desc);		obj_desc = xstrdup(value);		(void)COL_Normalize_Name(obj_desc);		obj_desc_s = strlen(obj_desc);	} else if (strcasecmp(tag, S_CLEANR) == 0) {		if (sscanf(value, "%d", &clean_rate) < 1)			errorlog("Could not convert clean rate in config. \n");		if (clean_rate < 0) {			errorlog("Invalid clean rate %d. Set to %d.\n", clean_rate, CLEAN_RATE);			clean_rate = CLEAN_RATE;		}	} else if (strcasecmp(tag, S_COLLR) == 0) {		if (sscanf(value, "%d", &collect_rate) < 1)			errorlog("Could not convert collection rate in config. \n");		if (collect_rate < 0) {			errorlog("Invalid collection rate %d. Set to %d.\n", collect_rate, COLLECT_RATE);			collect_rate = COLLECT_RATE;		}	} else if (strcasecmp(tag, S_RFR) == 0) {		if (sscanf(value, "%d", &drefresh_rate) < 1)			errorlog("Could not convert refresh rate in config. \n");		if (drefresh_rate <= 0) {			errorlog("Invalid refresh rate %d. Setting to %d.\n", drefresh_rate, REFRESH_RATE);			drefresh_rate = REFRESH_RATE;		}	} else if (strcasecmp(tag, S_PORT) == 0) {		if (sscanf(value, "%d", &qport) < 1)			errorlog("Could not convert port in config. \n");		if (qport < 1) {			errorlog("Invalid port number %d. Set to %d.\n", qport, Q_PORT);			qport = Q_PORT;		}	} else if (strcasecmp(tag, S_DEDLIM) == 0) {		if (sscanf(value, "%d", &reg_limit) < 1)			errorlog("Could not convert dead entry limit in config. \n");		if (reg_limit < 0) {			errorlog("Invalid dead entry limit %d. Set to %d.\n", reg_limit, MAX_DEAD);			reg_limit = MAX_DEAD;		}	} else if (strcasecmp(tag, S_MAXEV) == 0) {		if (sscanf(value, "%d", &max_events) < 1)			errorlog("Could not convert event queue limit in config. \n");		if (max_events < 1) {			errorlog("Invalid event queue limit %d. Set to %d.\n", max_events, MAX_EVENTS);			max_events = MAX_EVENTS;		}	} else if (strcasecmp(tag, S_LOGK) == 0) {		if (sscanf(value, "%i", &logf) < 1)			errorlog("Could not convert log key in config. \n");		LOG_turn_on(logf);		LogFlag = 1;	} else if (strcasecmp(tag, S_FCOLL) == 0) {		Tstr = xstrdup(value);		Set_Next_Collection(Tstr);	} else if (strcasecmp(tag, S_INDTP) == 0) {		if (strcasecmp(value, FULLI) == 0)			IndexType = I_FULL;		else if (strcasecmp(value, INCRI) == 0)			IndexType = I_INCR;		else if (strcasecmp(value, PEROBJI) == 0)			IndexType = I_PER_OBJ;	} else if (strcasecmp(tag, S_INDEXER) == 0) {		IndexerType = xstrdup(value);		return (Indexer_Init());	} else if (strcasecmp(tag, S_TERSELOG) == 0) {		if (value != NULL && strcasecmp(value, "on") == 0)			Log_Terse = 1;		else			Log_Terse = 0;	} else if (strcasecmp(tag, S_FASTSTART) == 0) {		if (value != NULL && strcasecmp(value, "on") == 0)			do_fast_start = 1;		else			do_fast_start = 0;	} else if (strcasecmp(tag, S_RDQTO) == 0) {		if (value != NULL)			ReadQueryTimeout = atoi(value);		if (ReadQueryTimeout < 0)			ReadQueryTimeout = READ_QUERY_TIMEOUT;	} else {		if (do_IND_config(value, tag) == ERROR)			return ERROR;	}	return SUCCESS;}int Do_Collection(){	if (collect_flag == -1)		Next_Collection = Next_Collection + collect_rate;	if (collect_rate > 0)		collect_flag = 0;	return (COL_Do_Collection());}int Do_Compression(){	comflag = 0;	return (RG_Compress());}int Do_Uniqify(){	comflag = 0;	return (RG_Uniqify_by_URL());}int Do_Restart_Index_Server(){	IndexServer_ForceRestart = 1;	return SUCCESS;	/* The index server will be restarted after the next query */}int Do_Cleaning(){	RG_Cleaner();	if (clean_rate > 0)		Next_Clean = Cur_Time + clean_rate;	return SUCCESS;}/* Configure the Broker according to a given config file */int Config_Broker(cfile)char *cfile;{	FILE *cfp;	char dspace[BUFSIZ], tag[BUFSIZ], value[BUFSIZ];	int llen, cc;	char *vptr, *eptr;	Debug(77,1,("Configuring Broker from the file: %s\n", cfile));	/* assign defaults to non-critical globals */	Gather = xstrdup("gather");	obj_desc = xstrdup(OBJ_DESC);	obj_desc_s = strlen(obj_desc);	collect_rate = COLLECT_RATE;	clean_rate = CLEAN_RATE;	qport = Q_PORT;	drefresh_rate = REFRESH_RATE;	reg_limit = MAX_DEAD;	max_events = MAX_EVENTS;	WebPath = NULL;	ColConfig = NULL;	/* open config file */	if ((cfp = fopen(cfile, "r")) == NULL) {		Log("WARNING: Cannot read the Broker configuration file: %s\n", cfile);		log_errno(cfile);		return ERROR;	}	while (fgets(dspace, BUFSIZ, cfp) != NULL) {		/* Parse config line */		if (dspace[0] == '#') {		        continue;		} else if (sscanf(dspace, "%s %[^\n]", tag, value) < 2) {			continue;		} else {			char *p = value+strlen(value)-1;			while (isspace(*p)) *p-- = '\0';			if (set_str(tag, value) == ERROR) {                        	fclose(cfp);                        	return ERROR;                        }                }		memset(tag, 0, BUFSIZ);		memset(value, 0, BUFSIZ);		memset(dspace, 0, BUFSIZ);	}	fclose(cfp);	/* if logging has not been set, turn on all logging. */	if (LogFlag == 0)		LOG_turn_on(~00);	if (DIRpath == NULL) {		Log("WARNING: Configuration file %s does not set the critical value: DIRpath.\n", cfile);		return ERROR;	} else if (WebServer == NULL) {		Log("WARNING: Configuration file %s does not set the critical value: WebServer.\n", cfile);		return ERROR;	}	return SUCCESS;}int Set_Next_Collection(mytime)char *mytime;{	char *tt;	int hr, min;	struct tm *tmptr;	time_t tmp, addt = 0;	if ((tt = strchr(mytime, ':')) == NULL) {		Log("WARNING: Bad Time Format for Next Collection.\n");		Next_Collection = 0;		return ERROR;	}	hr = atoi(mytime);	min = atoi(tt + 1);	tzset();	tmp = UTIL_Get_Time();	tmptr = localtime(&tmp);	if (tmptr->tm_hour > hr)		addt = 86400;	/* 24 hrs in sec. */	else		addt = 0;	tmptr->tm_hour = hr;	tmptr->tm_min = min;#ifdef HAVE_MKTIME	Next_Collection = mktime(tmptr) + addt;#else	Next_Collection = timelocal(tmptr) + addt;#endif	return SUCCESS;}static int Initialize_Broker(deffile)char *deffile;{	Log("Using the configuration file: %s\n", deffile);	harvest_add_broker_path();	/* Randomize */#if defined(HAVE_SRAND48)	srand48(time(NULL));#elif defined(HAVE_SRANDOM)	(void) srandom(time(NULL));#else	srand(time(NULL));#endif	LOG_reset_master();	(void)UTIL_Get_Time();	/* sets Cur_Time */	if ((Initialize_Name() == ERROR) ||	/* sets current hostname */	    (Indexer_Init() == ERROR) ||	/* indexer: before config */	    (Config_Broker(deffile) == ERROR) ||/* must be first */	    (LOG_Init() == ERROR) ||		/* log file */	    (SM_Init() == ERROR) ||		/* storage manager */	    (RG_Init() == ERROR) ||		/* registry depends on SM */	    (EV_Init() == ERROR) ||		/* event list */	    (Initialize_Com() == ERROR))	/* do port init last */		return ERROR;	/* do some misc initialization below */	/* WebServer is non-NULL at this point */	brk_obj_url = (char *) xmalloc(strlen(WebServer) + 100);	if (WebPath) {		sprintf(brk_obj_url, "http://%s%s/objects", WebServer, WebPath);	} else {		sprintf(brk_obj_url, "http://%s/BrkDir/objects", WebServer);	}	if (ColConfig == NULL)		ColConfig = UTIL_make_admin_filename("Collection.conf");	if (collect_rate == 0)		collect_flag = 2;	if (clean_rate == 0)		Next_Clean = -1;	AD_run_admin_process();	return SUCCESS;}/* *  Initialize_Indexer_Routines - inits the run-time selectable indexing *  routines so that a user may switch indexers. */static int Indexer_Init(){	int i;	if (MAX_INDEXER_TYPES <= 0) {		Log("ERROR: No Indexers defined for this broker!\n");		return ERROR;	}	if (IndexerType != NULL) {		if (!Valid_Indexer_Type(IndexerType)) {			Log("ERROR: Invalid IndexerType: %s\n", IndexerType);			return ERROR;		}	} else {		/* default to first indexer in list, not necessarily Glimpse */		IndexerType = xstrdup(Indexer_Routines[0].indexer_type);	}	for (i = 0; i < MAX_INDEXER_TYPES; i++) {		if (!strcmp(Indexer_Routines[i].indexer_type, IndexerType))			break;	}	if (i >= MAX_INDEXER_TYPES) {		Log("ERROR: Invalid IndexerType: %s\n", IndexerType);		return ERROR;	}	INDEXER = &Indexer_Routines[i];	do_IND_initialize();	return SUCCESS;}/* *  disconnect() - Disconnect the process from the controlling terminal. *  Adapted from Harvest's floodd daemon in the replicator. */static void disconnect(){  	int pid, fd;	/* ignore all job control signals */#ifdef SIGTTOU	signal (SIGTTOU, SIG_IGN);#endif#ifdef SIGTTIN	signal (SIGTTIN, SIG_IGN);#endif#ifdef SIGTSTP	signal (SIGTSTP, SIG_IGN);#endif#ifndef NO_FORK_FOR_DAEMON	if ((pid = fork()) < 0) {		log_errno("fork");		return;	}	if (pid) {		/* parent */		exit(0);	/* quietly exit to let child do the work */	}				/* child continues */	(void) setsid();	/* Reset the session ID */#endif	/* Logging beneath here will not work */	/* Close all file descriptors */	close_all_fds(0);  	/* Redirect the stdin to /dev/null */  	if ((fd = open("/dev/null", O_RDONLY, 0)) < 0)		exit(99);  	(void) dup2(fd, 0);  	/* Redirect the stdout and stderr to broker.out */  	if ((fd = open("broker.out", O_WRONLY|O_CREAT|O_APPEND, 0640)) < 0)		exit(99);  	(void) dup2(fd, 1);  	(void) dup2(fd, 2);	/* stdout and stderr is now to broker.out for logging */}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -