⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 index.c

📁 harvest是一个下载html网页得机器人
💻 C
字号:
#include "broker.h"#include "log.h"#include "Grass/index.h"/* Global variables */extern char *DIRpath;extern char *brk_obj_url;extern int IndexType;extern int QM_opaqueflag;extern int QM_gotphrase;	/* got a quoted phrase or not */extern int IndexServer_pid;extern char *QM_op;extern char *SM_Get_Obj_Filename();	/* only UNIX filesystem SM *//* Local functions */#define LOCAL staticLOCAL int GR_Index_Object _PARAMS((reg_t *));LOCAL char *GR_do_qlist _PARAMS((qlist_t *));LOCAL char *GR_build_select _PARAMS((qlist_t *));LOCAL fd_t GR_getfd _PARAMS((char *));LOCAL int GRASS_Start_Indexing _PARAMS((char *));/* Local variables */LOCAL char *GR_GRASS = NULL;LOCAL char *GR_GRASSInd = NULL;LOCAL int GR_NewObj;LOCAL int GR_maxresults;LOCAL int GR_illegal_query = 0;LOCAL int GR_lifetime = 15 * 60;LOCAL int GR_max_lifetime = 15 * 60;	/* 15 minutes */LOCAL int GR_ncalled = 0;#define GRASSINDEX "grassindex"#define GRASSQUERY "grassquery"#define BADQ_STR \        "103 - ERROR: GRASS Indexer cannot support your query.\n"/* * **  GRASS_Start_Indexing - Start a grassindex process. */LOCAL int GRASS_Start_Indexing(comm)     char *comm;{	int pid, status = 0;#if DEBUG1	Log("\t command :%s:\n", comm);#endif	/* must use fork() rather than vfork() which causes memory leaks */	if ((pid = fork()) < 0) {		log_errno("fork");		return ERROR;	}	if (pid == 0) {		/* child */		char *argv[64];		close_all_fds(3);		memset(argv, '\0', sizeof(argv));		parse_argv(argv, comm);		execvp(argv[0], argv);		perror(argv[0]);		_exit(1);	}	/* parent */	Log("Waiting for grassindex to finish...\n");	/* while grassindex is running, explicitly wait for it */	while (waitpid(pid, &status, WNOHANG) != pid) {		select_loop(15, 0, 0);	/* deny outside connections */		if (kill(pid, 0) != 0)			break;	/* child died, and was caught by sigreap */	}	return SUCCESS;}/* ----------------------------------------------------------------- * * GR_Index_Object -- using grass -a to index a single object * ----------------------------------------------------------------- */LOCAL int GR_Index_Object(entry)     reg_t *entry;{	char comm[BUFSIZ], *fn;	fn = SM_Get_Obj_Filename(entry->FD);	sprintf(comm, "%s -a %s %s", GRASSINDEX, DIRpath, fn);	xfree(fn);	return (GRASS_Start_Indexing(comm));}/* ----------------------------------------------------------------- * * GR_bulk_query - do bulk transfer of all objects that match the query * ----------------------------------------------------------------- */LOCAL int GR_bulk_query(rsock, indexfp, ptime)     int rsock;     FILE *indexfp;     time_t ptime;{	char ret[BUFSIZ];	fd_t qfd, oldfd = -1;	int cnt = 0;	reg_t *bentry;	FILE *fp;	if ((fp = fdopen(rsock, "w")) == NULL) {		log_errno("fdopen");		QM_send_bulk_err(rsock);		return ERROR;	}	QM_send_bulk_begin(rsock);	while (fgets(ret, BUFSIZ, indexfp) != NULL) {		if (((qfd = GR_getfd(ret)) != ERROR) &&		    (qfd != oldfd) &&		    ((bentry = RG_Get_Entry(qfd)) != NULL) &&		    (bentry->update_time >= ptime) &&		    (QM_send_bulk_fd(qfd, fp, bentry) == SUCCESS)) {			cnt++;		}	}	fflush(fp);		/* critical, must flush before termination */	QM_send_bulk_end(rsock);	fclose(fp);	return SUCCESS;}/* ----------------------------------------------------------------- * * GR_del_query -- delete all objects that match the query.  * ----------------------------------------------------------------- */LOCAL int GR_del_query(rsock, indexfp)     int rsock;     FILE *indexfp;{	char ret[BUFSIZ];	fd_t qfd, oldfd = -1;	int cnt = 0;	reg_t *rme;	while (fgets(ret, BUFSIZ, indexfp) != NULL) {		if (((qfd = GR_getfd(ret)) != ERROR) &&		    (qfd != oldfd) &&		    ((rme = RG_Get_Entry(qfd)) != NULL)) {			COL_DEL_Obj(rme);			cnt++;		}	}	Log("Deleted %d objects based on query.\n", cnt);	return SUCCESS;}/* ----------------------------------------------------------------- * * GR_user_query -- Read the output of the GRASS query on indexfp, then * send to rsock via protocol. * ----------------------------------------------------------------- */LOCAL int GR_user_query(rsock, indexfp)     int rsock;     FILE *indexfp;{	fd_t fd1, fd2 = (fd_t) (-1);	char inb[BUFSIZ], opb[BUFSIZ], *opdata[BUFSIZ], *tmp, *s;	int opsize = 0, obcnt = 0, i;	/* If the query was illegal, give up quickly */	if (GR_illegal_query) {		SWRITE(rsock, BADQ_STR, strlen(BADQ_STR));		return ERROR;	}	/*	 *  Before we return the query results, we perform 2 write's on	 *  the socket to the client to test whether or not the client	 *  will be able to receive the query results.	 *  We have to do two writes because the first will complete 	 *  even though the other side is gone.	 */	(void) write(rsock, PIPECHK, strlen(PIPECHK));	if (write(rsock, PIPECHK, strlen(PIPECHK)) == -1) {		errorlog("Client is gone -- aborting user query results.\n");		close(rsock);		return ERROR;	}	memset(opdata, '\0', BUFSIZ * sizeof(char *));	/* zero out opdata */	while (fgets(inb, BUFSIZ, indexfp) != NULL) {		if ((fd1 = GR_getfd(inb)) == ERROR) {			if (!strncmp(inb, "grass:", 8)) {	/* a msg */				inb[strlen(inb) - 1] = '\0';				Log("%s\n", inb);			}			continue;		}		if ((fd1 != fd2) && (fd2 != (fd_t) (-1))) {			/* return the previous object */			if (QM_user_object(rsock, fd2, opsize, opdata)			    == SUCCESS)				obcnt++;			/* free the opaque data */			for (i = 0; i < BUFSIZ; i++) {				if (opdata[i] != NULL) {					xfree(opdata[i]);					opdata[i] = NULL;				}			}			opsize = 0;		}		fd2 = fd1;	}	/* Get the last object */	if (fd2 != (fd_t) (-1)) {		if (QM_user_object(rsock, fd2, opsize, opdata) == SUCCESS)			obcnt++;	}	QM_user_done(rsock, obcnt);	/* Free memory */	for (i = 0; i < BUFSIZ; i++)		if (opdata[i] != NULL)			xfree(opdata[i]);	return SUCCESS;}/* ----------------------------------------------------------------- * * GR_do_qlist -- Recursive function to build a query from the list. * ----------------------------------------------------------------- */LOCAL char *GR_do_qlist(ql)     qlist_t *ql;{#ifdef USE_PARENS_FOR_BOOLEAN	char *ll, *rl;	static char *nl;	if (ql->type == LOGICAL) {		if (ql->op == NOT) {			return NULL;		}		if ((ll = GR_do_qlist((qlist_t *) ql->llist)) == NULL) {			return NULL;		}		if ((rl = GR_do_qlist((qlist_t *) ql->rlist)) == NULL) {			xfree(ll);			return NULL;		}		nl = (char *) xmalloc(SEL_SIZE);		nl[0] = '(';		nl[1] = '\0';		strcat(nl, ll);		switch (ql->op) {		case AND:			strncat(nl, ";", 1);			break;		case OR:			strncat(nl, ",", 1);			break;		default:			xfree(nl);			xfree(rl);			xfree(ll);			return NULL;		}		strcat(nl, rl);		strcat(nl, ")");		xfree(ll);		xfree(rl);		return (nl);	}	return (GR_build_select(ql));#else	char *ll, *rl;	if (ql->type == LOGICAL) {		if (ql->op == NOT) {			return NULL;		}		if ((ll = GR_do_qlist((qlist_t *) ql->llist)) == NULL) {			return NULL;		}		if ((rl = GR_do_qlist((qlist_t *) ql->rlist)) == NULL) {			xfree(ll);			return NULL;		}		switch (ql->op) {		case AND:			strncat(ll, ";", 1);			break;		case OR:			strncat(ll, ",", 1);			break;		default:			xfree(rl);			xfree(ll);			return NULL;		}		strcat(ll, rl);		xfree(rl);		return (ll);	}	return (GR_build_select(ql));#endif}/* ----------------------------------------------------------------- * * GR_build_select -- Build the basic GRASS query.  * ----------------------------------------------------------------- */LOCAL char *GR_build_select(ql)     qlist_t *ql;{	static char *tmp;	tmp = (char *) xmalloc(SEL_SIZE);	tmp[0] = '\0';	if (ql->llist) {		sprintf(tmp, "%s=%s", ql->llist, ql->rlist);		xfree(ql->rlist);		xfree(ql->llist);		return (tmp);	}	sprintf(tmp, "%s", ql->rlist);	xfree(ql->llist);	return (tmp);	return NULL;}/* ----------------------------------------------------------------- * * GR_getfd -- Get the fd of the GRASS return. * ----------------------------------------------------------------- */LOCAL fd_t GR_getfd(instr)     char *instr;{	char *tmp;	if ((tmp = strstr(instr, "OBJ")) == NULL)		return ERROR;	tmp += 3;		/* strlen("OBJ") */	return ((fd_t) atol(tmp));}/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX  * PUBLIC FUNCTIONS * XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX *//* ----------------------------------------------------------------- * * IND_New_Object -- index a new object * ----------------------------------------------------------------- */int GRASS_IND_New_Object(entry)     reg_t *entry;{	int ret = SUCCESS;	if (IndexType == I_PER_OBJ)		ret = GR_Index_Object(entry);	if (ret == SUCCESS)		GR_NewObj++;	return (ret);}/* ----------------------------------------------------------------- * * IND_Index_Full() -- perform a complete index of all objects. * ----------------------------------------------------------------- */int GRASS_IND_Index_Full(){	char comm[BUFSIZ];	memset(comm, '\0', BUFSIZ);	Log("Begin GRASS Full Indexing...\n");	sprintf(comm, "%s %s/objects", GRASSINDEX, DIRpath);	GRASS_Start_Indexing(comm);	Log("Finished GRASS Full Indexing.\n");	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_Index_Incremental -- perform an incremental index * ----------------------------------------------------------------- */int GRASS_IND_Index_Incremental(){	Log("Sorry, but GRASS incremental indexing is not supported\n");	Log("Using full indexing instead...\n");	return GRASS_IND_Index_Full();}/* ----------------------------------------------------------------- * * IND_Index_Start -- prepare for indexing a stream of objects. * ----------------------------------------------------------------- */int GRASS_IND_Index_Start(){	GR_NewObj = 0;	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_Index_Flush -- finish indexing a stream of objects. * ----------------------------------------------------------------- */int GRASS_IND_Index_Flush(){	if (GR_NewObj > 0) {		/* Do the default indexing operation */		switch (IndexType) {		case I_FULL:			return (GRASS_IND_Index_Full());		case I_INCR:			return (GRASS_IND_Index_Incremental());		case I_PER_OBJ:			break;		default:			fatal("GRASS_IND_Index_Flush: Internal error.\n");		}	}	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_Destroy_Obj -- remove an object from the indexer. * ----------------------------------------------------------------- */int GRASS_IND_Destroy_Obj(entry)     reg_t *entry;{	/* Nop in GRASS */	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_initialize -- initialize interface to indexer * ----------------------------------------------------------------- */int GRASS_IND_initialize(){	IndexType = I_FULL;	GR_ncalled = 0;	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_config -- configure indexer specific variables  * ----------------------------------------------------------------- */int GRASS_IND_config(value, tag)     char *value;     char *tag;{	if (tag == NULL || value == NULL)		return ERROR;#if DEBUG2	Log("GRASS Configuration: %s %s\n", value, tag);#endif	return SUCCESS;}/* ----------------------------------------------------------------- * * IND_do_query -- process a query string  * ----------------------------------------------------------------- */int GRASS_IND_do_query(ql, rsock, qflag, ptime)     qlist_t *ql;     int rsock, qflag;     time_t ptime;{	FILE *indexfp;	char commandstr[BUFSIZ], xbuf[BUFSIZ], *patstr, *tfn = NULL;	int err = SUCCESS;	GR_ncalled++;	if (QM_op == (char *) 0) {		errorlog("Query operation not set\n");		return ERROR;	}	sprintf(commandstr, "%s %s", GRASSQUERY, QM_op);	patstr = GR_do_qlist(ql);	if (patstr != NULL) {		sprintf(commandstr + strlen(commandstr), " \'%s\' ", patstr);		xfree(patstr);		/* Need a tmpfile for grass output */		if ((tfn = tempnam(NULL, "query")) != NULL) {			strcat(commandstr, " > ");			strcat(commandstr, tfn);		} else {			SWRITE(rsock, IND_FAIL, IND_FAIL_S);			return ERROR;	/* shouldn't really happen */		}#if DEBUG1#endif		Log("GRASS search command: %s\n", commandstr);		/* Run the user query, give only GR_lifetime seconds */		do_system_lifetime(commandstr, GR_lifetime);		/* Now process the tempfile that contains the results */		if ((indexfp = fopen(tfn, "r")) == NULL) {			log_errno(tfn);			(void) unlink(tfn);			xfree(tfn);	/* PURIFY */			if (qflag == UQUERY) {				SWRITE(rsock, IND_FAIL, IND_FAIL_S);			} else {				QM_send_bulk_err(rsock);			}			(void) close(rsock);			return ERROR;		}		/* Process the grass results based on this query type */		switch (qflag) {		case QBULK:#ifdef FORK_ON_BULK			if (fork() == 0) {	/* child */				int e[3];				e[0] = rsock;				e[1] = fileno(indexfp);				e[2] = -1;				close_all_fds_except(3, e);				(void) GR_bulk_query(rsock, indexfp, ptime);				(void) fclose(indexfp);				(void) unlink(tfn);				(void) close(rsock);				_exit(0);			}			err = SUCCESS;#else			err = GR_bulk_query(rsock, indexfp, ptime);#endif			break;		case UQUERY:			err = GR_user_query(rsock, indexfp);			break;		case QDELETE:			err = GR_del_query(rsock, indexfp);			break;		default:			break;		}		/* Clean up */		(void) fclose(indexfp);		(void) unlink(tfn);		xfree(tfn);	/* PURIFY */	} else if (qflag == QBULK) {		QM_send_bulk_err(rsock);		err = ERROR;	} else {		(void) write(rsock, ERR_MSG, strlen(ERR_MSG));		err = ERROR;	}	(void) close(rsock);	/* close so that results are sent */	return err;}/* ----------------------------------------------------------------- * * IND_Init_Flags -- intialize query parser flags  * ----------------------------------------------------------------- */void GRASS_IND_Init_Flags(){	GR_lifetime = GR_max_lifetime;	/* reset on each query */	GR_maxresults = 0;	/* Max number of hits in the result set */	GR_illegal_query = 0;	/* Is GRASS capable of this query */}/* ----------------------------------------------------------------- * * IND_Set_Flags -- set query parser flag * ----------------------------------------------------------------- */void GRASS_IND_Set_Flags(flag, val)     char *flag, *val;{	if (flag == NULL)		return;	if (strcasecmp(flag, "timeout") == 0) {		if (val != NULL)			GR_lifetime = atoi(val);		if (GR_lifetime < 10)			GR_lifetime = 10;	/* at least 10 seconds */		if (GR_lifetime > GR_max_lifetime)			GR_lifetime = GR_max_lifetime;	} else if ((strcasecmp(flag, "maxresult") == 0) && val != NULL) {		GR_maxresults = atoi(val);		if (GR_maxresults < 1)			GR_maxresults = 0;	} else {		GR_illegal_query = 1;	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -