⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 index.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
	 *  Before we return the query results, we perform 2 write's on	 *  the socket to the client to test whether or not the client	 *  will be able to receive the query results.	 *  We have to do two writes because the first will complete	 *  even though the other side is gone.	 */	(void) write(rsock, PIPECHK, strlen(PIPECHK));	if (write(rsock, PIPECHK, strlen(PIPECHK)) == -1) {		errorlog("Client is gone -- aborting user query results.\n");		close(rsock);		return ERROR;	}	opdata = (char **) xmalloc(MAX_OPDATA_SIZE * sizeof(char *));	inb = (char *) xmalloc(BIG_BUFSIZ * sizeof(char));	opb = (char *) xmalloc(BIG_BUFSIZ * sizeof(char));	while (fgets(inb, BIG_BUFSIZ, indexfp) != NULL) {		if ((fd1 = GL_getfd(inb)) == ERROR) {			if (!strncmp(inb, "glimpse:", 8))	/* a msg */				Log("%s", inb);			continue;		}		if ((fd1 != fd2) && (fd2 != (fd_t) (-1))) {			/* return the previous object */			Debug(102, 1, ("GL_user_query: Calling QM_user_object(rsock=%d, fd2=%d, opsize=%d, opdata=%08x)\n", rsock, fd2, opsize, opdata));			switch(QM_user_object(rsock, fd2, opsize, opdata)) {			case SUCCESS:			        obcnt++;                                break;                        case ERROR:                                /* client socket went away */			        /* free the opaque data */			        Debug(102, 1, ("GL_user_query: About to free %d opaque strings\n", opsize));				for (i = 0; i < opsize; i++) {				       Debug(102, 1, ("GL_user_query: freeing opdata[%d]\n", i));                                       xfree(opdata[i]);                                       opdata[i] = (char *) NULL;				}				opsize = 0;				xfree(opdata);				xfree(inb);				xfree(opb);				return ERROR;				/* NOTREACHED */                                break;                        default:                                /* FAIL: wasn't an object */                                break;			}			Debug(102, 1, ("GL_user_query: About to free %d opaque strings\n", opsize));			/* free the opaque data */			for (i = 0; i < opsize; i++) {				Debug(102, 1, ("GL_user_query: freeing opdata[%d]\n", i));				xfree(opdata[i]);				opdata[i] = (char *) NULL;			}			opsize = 0;		}		/*		 *  If there's a : in the output, then Glimpse is		 *  displaying a matched line.                 *		 *  URK - glimpse sometimes returns a corrupted matched line		 *  I don't know quite how we can get around this, but the		 *  following seems to work. It is, however, a kludgy hack		 *  which may break spectacularly if the storage manager is		 *  changed (sxw 14/05/97)		 */		if ((tmp2 = strstr(inb, "/OBJ")) !=NULL) {		  if ((tmp = strchr(tmp2, ':')) != NULL) {			if (s = strrchr(tmp, '\n'))				*s = '\0';			sprintf(opb, "Matched line: %s", ++tmp);			Debug(102, 1, ("GL_user_query: got matched line %d\n", opsize));			if (opsize < MAX_OPDATA_SIZE)				opdata[opsize++] = xstrdup(opb);		  }		}		fd2 = fd1;	}	/* Get the last object */	if (fd2 != (fd_t) (-1)) {		Debug(102, 1, ("GL_user_query: Calling QM_user_object(rsock=%d, fd2=%d, opsize=%d, opdata=%08x)\n", rsock, fd2, opsize, opdata));		if (QM_user_object(rsock, fd2, opsize, opdata) == SUCCESS)			obcnt++;	}	QM_user_done(rsock, obcnt);	/* free the opaque data */	Debug(102, 1, ("GL_user_query: About to free %d opaque strings\n", opsize));	for (i = 0; i < opsize; i++) {		Debug(102, 1, ("GL_user_query: freeing opdata[%d]\n", i));		xfree(opdata[i]);		opdata[i] = (char *) NULL;	}	opsize = 0;	xfree(opdata);	xfree(inb);	xfree(opb);	return SUCCESS;}/* ----------------------------------------------------------------- * * GL_do_qlist -- Recursive function to build a query from the list. * ----------------------------------------------------------------- */LOCAL char *GL_do_qlist(ql)     qlist_t *ql;{	char *ll = NULL;	char *rl = NULL;	static char *nl = NULL;	if (ql==NULL) return NULL;	if (ql->type == LOGICAL) {	        /* ll can be NULL if we're dealing with NOT */	        ll = GL_do_qlist((qlist_t *) ql->llist);	  	if ((rl = GL_do_qlist((qlist_t *) ql->rlist)) == NULL) {			if (ll!=NULL) xfree(ll);			ll = NULL;			return NULL;		}		nl = (char *) xmalloc(SEL_SIZE);#ifdef USE_PARENS_FOR_BOOLEAN		nl[0] = '{';		nl[1] = '\0';#else		nl[0] = '\0';#endif		if (ll!=NULL) strcat(nl, ll);		switch (ql->op) {		case AND:			strncat(nl, ";", 1);			break;		case OR:			strncat(nl, ",", 1);			break;                case NOT:			strncat(nl, "~", 1);			break;		default:			xfree(nl);			nl = NULL;			xfree(rl);			rl = NULL;			if (ll!=NULL) xfree(ll);			ll = NULL;			return NULL;		}		strcat(nl, rl);#ifdef USE_PARENS_FOR_BOOLEAN		strcat(nl, "}");#endif		if (ll!=NULL) xfree(ll);		ll = NULL;		xfree(rl);		rl = NULL;		return (nl);	}	return (GL_build_select(ql));}/* ----------------------------------------------------------------- * * GL_build_select -- Build the basic Glimpse query. * ----------------------------------------------------------------- */LOCAL char *GL_build_select(ql)     qlist_t *ql;{	char *tmp = NULL;	if (ql->op == EXACT) {		GL_regexflag = 1;		tmp = (char *) xmalloc(SEL_SIZE);		tmp[0] = '\0';		GL_errflag = -1;		if (ql->llist) {			sprintf(tmp, "%s=%s", ql->llist, ql->rlist);			xfree(ql->rlist);			ql->rlist = NULL;			xfree(ql->llist);			ql->llist = NULL;			return (tmp);		}		sprintf(tmp, "%s", ql->rlist);		xfree(ql->rlist);		ql->rlist = NULL;		return (tmp);	}	if (ql->op == REGEX) {		tmp = (char *) xmalloc(SEL_SIZE);		tmp[0] = '\0';		if (ql->llist) {			sprintf(tmp, "%s=%s", ql->llist, ql->rlist);			xfree(ql->rlist);			ql->rlist = NULL;			xfree(ql->llist);			ql->llist = NULL;			return (tmp);		}		sprintf(tmp, "%s", ql->rlist);		xfree(ql->rlist);		ql->rlist = NULL;		return (tmp);	}	return NULL;}/* ----------------------------------------------------------------- * * GL_getfd -- Get the fd of the Glimpse return. * ----------------------------------------------------------------- */LOCAL fd_t GL_getfd(instr)     char *instr;{	char *tmp = NULL;	if ((tmp = strstr(instr, "/OBJ")) == NULL)		return ERROR;	tmp += 4;		/* strlen("/OBJ") */	return ((fd_t) atol(tmp));}LOCAL void GL_query_inline_timeout(sig)     int sig;{	Log("Inline query timeout.  Sending SIGUSR1 to glimpseserver\n");	kill(IndexServer_pid, SIGUSR1);}/* ----------------------------------------------------------------- * * GL_do_query_inline -- the broker directly contacts glimpseserver * * has minimal error reporting becuase we use external glimpse * as a fallback. * ----------------------------------------------------------------- */LOCAL int GL_do_query_inline(ql, rsock, qflag, ptime)     qlist_t *ql;     int rsock;     int qflag;     time_t ptime;{	Host *h = NULL;	int gl_sock;	struct sockaddr_in sa;	char *argv[64];	int argc;	static char tbuf[64];	int i;	int pid;	FILE *fp = NULL;	int err;	char *patstr = NULL;	void (*alrmfunc) () = NULL;	Debug(102, 1, ("GL_do_query_inline: ql=%p, rsock=%d, qflag=%d, ptime=%d\n", ql, rsock, qflag, ptime));	if (qflag != UQUERY) {	/* only do user queries here */		errorlog("GL_do_query_inline: Only USER queries handled here.\n");		return FAIL;	}	if (!strcasecmp(GL_GlimpseServer, "false")) {		errorlog("GL_do_query_inline: GL_GlimpseServer is 'false'.\n");		return FAIL;	}	/*      Create a stream socket to glimpseserver                 */	if ((h = get_host(GL_GlimpseSrvHost)) == (Host *) NULL) {		errorlog("GL_do_query_inline: Failed to lookup '%s'.\n",		    GL_GlimpseSrvHost);		return FAIL;	}	if ((gl_sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) {		log_errno2(__FILE__, __LINE__, "GL_do_query_inline: socket");		return FAIL;	}	memcpy(&sa.sin_addr, h->ipaddr, h->addrlen);	sa.sin_family = AF_INET;	sa.sin_port = htons(GL_GlimpseSrvPort);	Debug(102, 1, ("GL_do_query_inline: connecting to %s, port %d\n",		inet_ntoa(sa.sin_addr), GL_GlimpseSrvPort));	if (connect(gl_sock, (struct sockaddr *) &sa, sizeof(sa)) < 0) {		log_errno2(__FILE__, __LINE__, "GL_do_query_inline: connect");		close(gl_sock);		return FAIL;	}	/*      Build command line args to pass to glimpseserver        */	argc = 0;	argv[argc++] = xstrdup("glimpse");	argv[argc++] = xstrdup("-a");	argv[argc++] = xstrdup("-H");	argv[argc++] = xstrdup(DIRpath);	argv[argc++] = xstrdup("-C");	argv[argc++] = xstrdup("-J");	argv[argc++] = xstrdup(GL_GlimpseSrvHost);	argv[argc++] = xstrdup("-K");	sprintf(tbuf, "%d", GL_GlimpseSrvPort);	argv[argc++] = xstrdup(tbuf);	argv[argc++] = xstrdup("-y");	argv[argc++] = xstrdup("-W"); /* Hack to make NOTs work */	if (GL_errflag > 0) {		sprintf(tbuf, "-%d", GL_errflag);		argv[argc++] = xstrdup(tbuf);		/* We can't do an error match with -i or -w so disable them */		GL_caseflag=0;		GL_wordflag=0;	}	argv[argc++] = xstrdup("-L");	sprintf(tbuf, "%d", GL_maxresults < 1 ? 1000 : GL_maxresults);	if (GL_maxfiles > 0)		sprintf(tbuf, "%s:%d", tbuf, GL_maxfiles);	if (GL_maxlines > 0)		sprintf(tbuf, "%s:%d", tbuf, GL_maxlines);	argv[argc++] = xstrdup(tbuf);	if (GL_caseflag == 1) {		argv[argc++] = xstrdup("-i");	}	if ((patstr = GL_do_qlist(ql)) == (char *) NULL) {		err = ERROR;		goto query_inline_done;	}	if ((strchr(patstr, '*') || strchr(patstr, '.') || strchr(patstr, '|')	     || strchr(patstr, '('))) {	  /* As we can't do regular expression + word match, rather than give	   * an error - just disable word match	   */	  GL_wordflag = 0;	}	if (GL_wordflag == 1) {		argv[argc++] = xstrdup("-w");	}	if (QM_opaqueflag != 1) {#ifdef GLIMPSE_3		if ((GL_caseflag == 1) && (GL_wordflag == 1))			argv[argc++] = xstrdup("-N");		else#endif			argv[argc++] = xstrdup("-l");	}	argv[argc++] = patstr;	/* patstr is already malloc'd */	/*      Write the "request" to glimpseserver                    */	/*      format is pid,argc,argv[0],argv[1],...                  */	pid = (int) getpid();	tbuf[0] = (pid >> 24) & 0xFF;	tbuf[1] = (pid >> 16) & 0xFF;	tbuf[2] = (pid >> 8) & 0xFF;	tbuf[3] = pid & 0xFF;	write(gl_sock, tbuf, 4);	tbuf[0] = (argc >> 24) & 0xFF;	tbuf[1] = (argc >> 16) & 0xFF;	tbuf[2] = (argc >> 8) & 0xFF;	tbuf[3] = argc & 0xFF;	write(gl_sock, tbuf, 4);	for (i = 0; i < argc; i++) {		write(gl_sock, argv[i], strlen(argv[i]) + 1);		write(gl_sock, "\n", 1);		Debug(102, 5, ("GL_do_query_inline: Passed argv[%d]: %s\n",			i, argv[i]));	}	shutdown(gl_sock, 1);	/* no more writing to glimpseserver */	/*      Read the query results                                  */	alrmfunc = signal(SIGALRM, GL_query_inline_timeout);	alarm(GL_lifetime);	if ((fp = fdopen(gl_sock, "r")) != (FILE *) NULL) {		err = GL_user_query(rsock, fp);	} else {		err = FAIL;	}      query_inline_done:	alarm(0);	signal(SIGALRM, alrmfunc);	for (i = 0; i < argc; i++) {		xfree(argv[i]);		argv[i] = NULL;	}	if (fp)		fclose(fp);	close(gl_sock);	return err;}/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX * PUBLIC FUNCTIONS * XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX *//* ----------------------------------------------------------------- * * IND_New_Object -- index a new object * ----------------------------------------------------------------- */int Glimpse_IND_New_Object(entry)     reg_t *entry;{	int ret = SUCCESS;	if (IndexType == I_PER_OBJ)		ret = GL_Index_Object(entry);	if (ret == SUCCESS)		GL_NewObj++;	return (ret);}/* ----------------------------------------------------------------- * * IND_Index_Full() -- perform a complete index of all objects. * ----------------------------------------------------------------- */int Glimpse_IND_Index_Full(){	static char comm[BUFSIZ];	/* PURIFY: sez 'uninitialized memory read' */	memset(comm, '\0', BUFSIZ);	Log("Begin Glimpse Full Indexing...\n");	sprintf(comm, "%s %s %s -H %s %s/objects", GL_GlimpseInd,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -