⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 index.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
	    GL_IndexOption, GL_IndexFlags, DIRpath, DIRpath);	Glimpse_Start_Indexing(comm);	Log("Finished Glimpse Full Indexing.\n");	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_Index_Incremental -- perform an incremental index * ----------------------------------------------------------------- */int Glimpse_IND_Index_Incremental(){	static char comm[BUFSIZ];	Log("Begin Glimpse Incremental Indexing...\n");	memset(comm, '\0', BUFSIZ);	if (strstr(GL_IndexOption, "-b") != NULL) {		sprintf(comm, "%s %s %s -H %s %s/objects", GL_GlimpseInd,		    GL_IndexOption, GL_IndexFlags, DIRpath, DIRpath);		Log("Sorry, but Glimpse incremental indexing is not supported with byte-level indexes (-b).\n");		Log("Using full indexing instead: %s\n", comm);	} else {		sprintf(comm, "%s %s %s -f -H %s %s/objects", GL_GlimpseInd,		    GL_IndexOption, GL_IndexFlags, DIRpath, DIRpath);	}	Glimpse_Start_Indexing(comm);	Log("Finished Glimpse Incremental Indexing.\n");	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_Index_Start -- prepare for indexing a stream of objects. * ----------------------------------------------------------------- */int Glimpse_IND_Index_Start(){	GL_NewObj = 0;	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_Index_Flush -- finish indexing a stream of objects. * ----------------------------------------------------------------- */int Glimpse_IND_Index_Flush(){	if (GL_NewObj > 0) {#ifdef USE_INCREMENTAL_OPTIMIZATION		/*		 *  Try to optimize by performing an incremental indexing		 *  if only approx 10% of the database has changed.  For the		 *  approximation we take the total number of current items		 *  in the registry and see if 10% of that is GL_NewObj.		 */		if ((int) (RG_Count_Reg() / 10) > (int) GL_NewObj) {			Log("Performing an incremental indexing, since less than 10%% of the objects has changed.\n");			return (Glimpse_IND_Index_Incremental());		}#endif		/* Do the default indexing operation */		switch (IndexType) {		case I_FULL:			return (Glimpse_IND_Index_Full());		case I_INCR:			return (Glimpse_IND_Index_Incremental());		case I_PER_OBJ:			break;		default:			fatal("Glimpse_IND_Index_Flush: Internal error.\n");		}	}	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_Destroy_Obj -- remove an object from the indexer. * ----------------------------------------------------------------- */int Glimpse_IND_Destroy_Obj(entry)     reg_t *entry;{	/* Nop in Glimpse */	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_initialize -- initialize interface to indexer * ----------------------------------------------------------------- *//* * ** PURIFY: complains that all these strdup()'s are memory leaks. */int Glimpse_IND_initialize(){	GL_IndexOption = xstrdup("-b -s -B");	IndexType = I_FULL;	GL_IndexFlags = xstrdup("");	GL_Glimpse = xstrdup("glimpse");	GL_GlimpseInd = xstrdup("glimpseindex");	GL_GlimpseServer = xstrdup("glimpseserver");	GL_GlimpseSrvHost = xstrdup(getfullhostname());	GL_GlimpseSrvPort = 0;	GL_GlimpseSrvRestart = 0;	GL_ncalled = 0;	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_config -- configure indexer specific variables * ----------------------------------------------------------------- */int Glimpse_IND_config(value, tag)     char *value;     char *tag;{	if (tag == NULL || value == NULL)		return ERROR;	Debug(102, 1, ("Glimpse Configuration: %s %s\n", value, tag));	if (strcasecmp(tag, S_GLIMPSE) == 0) {		GL_Glimpse = xstrdup(value);		if (verify_exe(GL_Glimpse) == ERROR)			return ERROR;	} else if (strcasecmp(tag, S_GLIMPSEIND) == 0) {		GL_GlimpseInd = xstrdup(value);		if (verify_exe(GL_GlimpseInd) == ERROR)			return ERROR;	} else if (strcasecmp(tag, S_GLIMPSESERVER) == 0) {		GL_GlimpseServer = xstrdup(value);		if (strcasecmp(GL_GlimpseServer, "false") &&		    verify_exe(GL_GlimpseServer) == ERROR)			return ERROR;	} else if (strcasecmp(tag, S_GLIMPSESRVHOST) == 0) {		GL_GlimpseSrvHost = xstrdup(value);	} else if (strcasecmp(tag, S_GLIMPSESRVPORT) == 0) {		sscanf(value, "%d", &GL_GlimpseSrvPort);	} else if (strcasecmp(tag, S_GLIMPSEMAXLIFE) == 0) {		if (sscanf(value, "%d", &GL_max_lifetime) != 1)			fatal("sscanf GL_max_lifetime failed");	} else if (strcasecmp(tag, S_GLIMPSESRVRESTART) == 0) {		if (sscanf(value, "%d", &GL_GlimpseSrvRestart) != 1)			fatal("sscanf GL_GlimpseSrvRestart failed");	} else if (strcasecmp(tag, S_GLIMPSEINDOPT) == 0) {		if (strcasecmp(value, "Fast-Search") == 0) {			GL_IndexOption = xstrdup("-b -s -B");		} else if (strcasecmp(value, "Medium") == 0) {			GL_IndexOption = xstrdup("-o -s -B");		} else if (strcasecmp(value, "Small-Index") == 0) {			GL_IndexOption = xstrdup("-s -B");		}		return SUCCESS;	} else if (strcasecmp(tag, S_GLIMPSEINDEXTOPT) == 0) {		GL_IndexFlags = xstrdup(value);	}	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_do_query -- process a query string * ----------------------------------------------------------------- */int Glimpse_IND_do_query(ql, rsock, qflag, ptime)     qlist_t *ql;     int rsock, qflag;     time_t ptime;{	FILE *indexfp = NULL;	static char commandstr[BUFSIZ];	static char xbuf[BUFSIZ];	char *patstr = NULL;	char *tfn = NULL;	int err = SUCCESS;	int tmp;	GL_ncalled++;	/* Run the glimpseserver if it hasn't been started yet. */	if (GL_ncalled == 1 && GL_GlimpseSrvPort > -1 && IndexServer_pid == 0) {		Glimpse_Start_Glimpseserver();	}	tmp=GL_do_query_inline(ql, rsock, qflag, ptime);	if (tmp==SUCCESS || tmp==ERROR) {	  err = tmp;	  Debug(102, 1, ("Inline query done!\n"));	  goto do_query_done;	}	strcpy(commandstr, GL_Glimpse);		/* glimpse command */	strcat(commandstr, " -a -W ");	/* print SOIF attrributes */	/*  Use glimpseserver if the port is defined */	if (GL_GlimpseSrvPort > 0 && IndexServer_pid > 0) {		sprintf(xbuf, " -C -J %s -K %d",		    GL_GlimpseSrvHost,		    GL_GlimpseSrvPort);		strcat(commandstr, xbuf);	}	/* Always give the directory.  If client/server fails, glimpse	 * will search the filesystem itself */	sprintf(xbuf, " -H %s", DIRpath);	strcat(commandstr, xbuf);	strcat(commandstr, " -y");	/* force the search */	if (GL_errflag > 0) {		sprintf(xbuf, " -%d", GL_errflag);		strcat(commandstr, xbuf);		/* We can't do an error match with -i or -w so disable them */		GL_caseflag=0;		GL_wordflag=0;	}	/* Glimpse 2.0 can limit the result set; always use -L 1000 if undef */	sprintf(xbuf, " -L %d", GL_maxresults < 1 ? 1000 : GL_maxresults);	strcat(commandstr, xbuf);	if (GL_maxfiles > 0) {		sprintf(xbuf, ":%d", GL_maxfiles);		strcat(commandstr, xbuf);	}	if (GL_maxfiles > 0 && GL_maxlines > 0) {		sprintf(xbuf, ":%d", GL_maxlines);		strcat(commandstr, xbuf);	}	/* Add Glimpse flags */	if (GL_caseflag == 1) {		strcat(commandstr, " -i");	}	/* If regexes are disabled */	if (GL_noregex == 1) {	        strcat(commandstr, " -k");	}	/* Generate Glimpse pattern to search */	patstr = GL_do_qlist(ql);	/* Check to see if the user did a regular expression */	if (patstr != NULL &&	    (strchr(patstr, '*') || strchr(patstr, '.') ||	     strchr(patstr, '|') || strchr(patstr, '(')))	  {	  /* As we can't do regular expression + word match, rather than give	   * an error - just disable word match	   */	  GL_wordflag = 0;	}	if (GL_wordflag == 1) {		strcat(commandstr, " -w");	}	if (QM_opaqueflag != 1) {#ifdef GLIMPSE_3		if ((GL_caseflag == 1) && (GL_wordflag == 1))			strcat(commandstr, " -N");		else#endif			strcat(commandstr, " -l");	}	if (patstr != NULL) {		sprintf(xbuf, " \'%s\'", patstr);		strcat(commandstr, xbuf);		xfree(patstr);		patstr = NULL;		/* Need a tmpfile for glimpse output */		if ((tfn = tempnam(NULL, "query")) != NULL) {			strcat(commandstr, " > ");			strcat(commandstr, tfn);		} else {			SWRITE(rsock, IND_FAIL, IND_FAIL_S);			return ERROR;	/* shouldn't really happen */		}		Debug(102, 1, ("Glimpse search command: %s\n", commandstr));		/* Run the user query, give only GL_lifetime seconds */		do_system_lifetime(commandstr, GL_lifetime);		/* Send USR1 to glimpseserver to tell it to clean up properly */		if (GL_GlimpseSrvPort > 0 && IndexServer_pid > 0)			(void) kill(IndexServer_pid, SIGUSR1);		/* Now process the tempfile that contains the results */		if ((indexfp = fopen(tfn, "r")) == NULL) {			log_errno(tfn);			(void) unlink(tfn);			xfree(tfn);			tfn = NULL;			if (qflag == UQUERY) {				SWRITE(rsock, IND_FAIL, IND_FAIL_S);			} else {				QM_send_bulk_err(rsock);			}			(void) close(rsock);			return ERROR;		}		/* Process the glimpse results based on this query type */		switch (qflag) {		case QBULK:#ifdef FORK_ON_BULK			if (fork() == 0) {	/* child */				close(qsock);				(void) GL_bulk_query(rsock, indexfp, ptime);				(void) fclose(indexfp);				(void) unlink(tfn);				(void) close(rsock);				_exit(0);			}			err = SUCCESS;#else			err = GL_bulk_query(rsock, indexfp, ptime);#endif			break;		case UQUERY:			err = GL_user_query(rsock, indexfp);			break;		case QDELETE:			err = GL_del_query(rsock, indexfp);			break;		default:			break;		}		/* Clean up */		(void) fclose(indexfp);		(void) unlink(tfn);		xfree(tfn);		tfn = NULL;	} else if (qflag == QBULK) {		QM_send_bulk_err(rsock);		err = ERROR;	} else {		(void) write(rsock, ERR_MSG, strlen(ERR_MSG));		Log(ERR_MSG);		err = ERROR;	}	(void) close(rsock);	/* close so that results are sent */      do_query_done:	/* Support for restarting Glimpseserver after N queries */	if ((GL_GlimpseSrvRestart > 0 && GL_ncalled >= GL_GlimpseSrvRestart)	    || IndexServer_ForceRestart) {		Log("Restarting glimpseserver after %d queries...\n",		    GL_ncalled);		Glimpse_Kill_Glimpseserver();		Glimpse_Start_Glimpseserver();		GL_ncalled = 0;		IndexServer_ForceRestart = 0;	}	return err;}/* ----------------------------------------------------------------- * * IND_Init_Flags -- intialize query parser flags * ----------------------------------------------------------------- */void Glimpse_IND_Init_Flags(){	GL_lifetime = GL_max_lifetime;	/* reset on each query */	GL_errflag = 0;		/* Number of Errors allowed */	GL_caseflag = 1;	/* Case Insenstive or not */	GL_wordflag = 0;	/* Match word */	GL_maxresults = 0;	/* Max number of hits in the result set */	GL_maxfiles = 0;	/* Max number of objects in the result set */	GL_maxlines = 0;	/* Max number of lines per object */	GL_illegal_query = 0;	/* Is Glimpse capable of this query */	GL_regexflag = 0;	/* RE or not? */	GL_noregex = 0;         /* Explicitly disable regexes */}/* ----------------------------------------------------------------- * * IND_Set_Flags -- set query parser flag * ----------------------------------------------------------------- */void Glimpse_IND_Set_Flags(flag, val)     char *flag;     char *val;{	if (flag == NULL)		return;	if (strcasecmp(flag, "error") == 0) {		GL_errflag = 0;		if (val != NULL) {			GL_errflag = atoi(val);			if (GL_errflag < 0)				GL_errflag = 0;			if (GL_errflag > 3)				GL_errflag = 3;		}		return;	}	if (strcasecmp(flag, "timeout") == 0) {		if (val != NULL)			GL_lifetime = atoi(val);		if (GL_lifetime < 10)			GL_lifetime = 10;	/* at least 10 seconds */		if (GL_lifetime > GL_max_lifetime)			GL_lifetime = GL_max_lifetime;		return;	}	if (strcasecmp(flag, "matchword") == 0) {		GL_wordflag = 1;		return;	}	if (strcasecmp(flag, "case") == 0) {		if (val != NULL) {			if (strcasecmp(val, "insensitive") == 0)				GL_caseflag = 1;			else if (strcasecmp(val, "sensitive") == 0)				GL_caseflag = 0;		}		return;	}	if ((strcasecmp(flag, "maxresult") == 0) && val != NULL) {		GL_maxresults = atoi(val);		if (GL_maxresults < 1)			GL_maxresults = 0;		return;	}	if ((strcasecmp(flag, "maxlines") == 0) && val != NULL) {		GL_maxlines = atoi(val);		if (GL_maxlines < 1)			GL_maxlines = 0;		return;	}	if ((strcasecmp(flag, "maxfiles") == 0) && val != NULL) {		GL_maxfiles = atoi(val);		if (GL_maxfiles < 1)			GL_maxfiles = 0;		return;	}	if (strcasecmp(flag, "noregex") == 0) {	        GL_noregex=1;		return;	}	GL_illegal_query = 1;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -