⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 index.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
			xfree(rl);			xfree(nl);			return NULL;		}		strcat(nl, rl);		strcat(nl, ")");		xfree(rl);		return (nl);	}	return (WAIS_build_select(ql));}/* ----------------------------------------------------------------- *   WAIS_build_select -- Build the basic WAIS query   * ----------------------------------------------------------------- */static char *WAIS_build_select(ql)qlist_t *ql;{	char *tmp;	if (ql->op == EXACT) {		tmp = (char *) xmalloc(SEL_SIZE);		tmp[1] = '\0';		if (ql->llist) {			strip_attr(ql->llist);			sprintf(tmp, "( %s = %s )", ql->llist, ql->rlist);			xfree(ql->rlist);			xfree(ql->llist);			return (tmp);		}		sprintf(tmp, "%s", ql->rlist);		xfree(ql->llist);		return (tmp);	} else if (ql->op == REGEX) {		tmp = (char *) xmalloc(SEL_SIZE);		tmp[0] = '\0';		if (ql->llist) {			strip_attr(ql->llist);			sprintf(tmp, "( %s = %s )", ql->llist, ql->rlist);			xfree(ql->rlist);			xfree(ql->llist);			return (tmp);		}		sprintf(tmp, "%s", ql->rlist);		xfree(ql->llist);		return (tmp);	}	return NULL;}/* -----------------------------------------------------------------   WAIS_getfd -- Get the fd of the WAISsearch return    ----------------------------------------------------------------- */static fd_t WAIS_getfd(instr)char *instr;{	char *tmp;	if ((tmp = strstr(instr, "OBJ")) == NULL)		return ERROR;	tmp += 3;		/* strlen("OBJ") */	return ((fd_t) atol(tmp));}static void strip_nl(s)char *s;{	char *p;	for (p = s; *p; p++)		if (isspace((unsigned char) *p))			*p = ' ';}/* ----------------------------------------------------------------- *   WAIS_do_query -- construct a WAIS query based upon the query list    structure Uses the AND and OR keywords for boolean operations and    translates structured queries to unstructured ones.   * ----------------------------------------------------------------- */static int WAIS_do_query(ql, rsock, qflag, ptime)qlist_t *ql;int rsock;int qflag;time_t ptime;{	FILE *indexfp = NULL;	FILE *fp = NULL;	static char commandstr[2048];	static char hostflag[BUFSIZ];	static char portflag[BUFSIZ];	char *tfn = NULL;	char *tifn = NULL;	char *patstr = NULL;	int err = SUCCESS;	patstr = WAIS_do_qlist(ql);	if (patstr) {		/* /dev/null is the default stdin for the broker */		hostflag[0] = portflag[0] = '\0';		if (!strcasecmp(WAISflavor, "commercial-wais")) {			strip_nl(patstr);			if (((tifn = tempnam(NULL, "waisi")) != NULL) &&			    ((fp = fopen(tifn, "w")) != NULL)) {				fprintf(fp, "h %d\n%s\nq\n",				  WAIS_maxresults ? WAIS_maxresults+1 : 41,					patstr);				fclose(fp);#if DEBUG3				Log("WAISlookup receiving: 'h %d\n%s\nq\n'\n",				     WAIS_maxresults ? WAIS_maxresults+1 : 41,				     patstr);#endif			} else {				return ERROR;			}			sprintf(commandstr, "%s -d \"%s@%s:%d\" < %s",				WAISlookup,				WAISdbname,				WAIShost ? WAIShost : "localhost",				WAISport ? WAISport : 210,				tifn);		} else {			if (WAIShost)				sprintf(hostflag, "-h %s", WAIShost);			if (WAISport > 0)				sprintf(portflag, "-p %d", WAISport);			sprintf(commandstr, "%s %s %s -d %s -m %d \'%s\'",			      WAISsearch, hostflag, portflag, WAISdbname,			 WAIS_maxresults ? WAIS_maxresults : 40, patstr);		}#if DEBUG1		Log("\tcommand :%s:\n", commandstr);#endif		if ((tfn = tempnam(NULL, "query")) != NULL) {			strcat(commandstr, " > ");			strcat(commandstr, tfn);		} else			return ERROR;	/* shouldn't really happen */		/* Perform WAIS, Inc. in-line query if possible */		if (!strcasecmp(WAISflavor, "commercial-wais") &&		    !strcasecmp(WAISlookup, "inline")) {#ifdef USE_WAIS_INLINE			FILE *fp;			if ((fp = fopen(tfn, "w+")) == NULL) {				log_errno(tfn);			} else {				do_wais_inline_search(fp, 					WAIShost, (long) WAISport,					patstr, WAISdbname,					WAIS_maxresults ? WAIS_maxresults+1 : 41					);				fclose(fp);			}#else			errorlog("WARNING: inline WAIS support not compiled in!\n");#endif		} else 		{			do_system_lifetime(commandstr, 60 * 4);	/* 4 min */		}		/* clean up some */		xfree(patstr);		/* must do this after inline_search */		if (tifn != NULL) {			(void) unlink(tifn);			xfree(tifn);		}		if ((indexfp = fopen(tfn, "r")) == NULL) {			log_errno(tfn);			(void) unlink(tfn);			xfree(tfn);	/* PURIFY */			if (qflag == UQUERY) {				SWRITE(rsock, IND_FAIL, IND_FAIL_S);			} else {				QM_send_bulk_err(rsock);			}			return ERROR;		}		switch (qflag) {		case QBULK:			err = WAIS_bulk_query(rsock, indexfp, ptime);			break;		case UQUERY:			err = WAIS_user_query(rsock, indexfp);			break;		case QDELETE:			err = WAIS_del_query(rsock, indexfp);			break;		default:			break;		}		(void) fclose(indexfp);		(void) unlink(tfn);		xfree(tfn);	/* PURIFY */		return (err);	}	if (qflag == QBULK)		QM_send_bulk_err(rsock);	else		SWRITE(rsock, ERR_MSG, ERR_SIZE);	return ERROR;}/*    *  ---------------------------------------------------------------------- *  PUBLIC FUNCTIONS  *  ---------------------------------------------------------------------- */int WAIS_IND_New_Object(entry)reg_t *entry;{	static char comm[BUFSIZ];	char *fn = NULL;	int r = SUCCESS;	FILE *fp = NULL;	/* Only index per object if config'ed that way */	if (IndexType == I_PER_OBJ) {		Log("Start WAIS Indexing Object: %d\n", entry->FD);		/* Grab the object file and build the index command */		fn = SM_Get_Obj_Filename(entry->FD);		if (!strcasecmp(WAISflavor, "commercial-wais"))			sprintf(comm, 				"%s -parse soif -r %s | %s -d %s/%s -append",			  	WAISparse, fn, WAISindex, DIRpath, WAISdbname);		else			sprintf(comm, "%s -r -a -d %s/%s %s",				WAISindex, DIRpath, WAISdbname, fn);		xfree(fn);		r = WAIS_Start_Indexing(comm);		Log("Done with WAIS Indexing Object: %d\n", entry->FD);	} else {		/* create/append a file to list all of the object filenames */		if (newobj_fn == NULL) {			newobj_fn = strdup(tempnam(NULL, "nobj"));		}		if ((fp = fopen(newobj_fn, "a+")) != NULL) {			fn = SM_Get_Obj_Filename(entry->FD);			fprintf(fp, "%s\n", fn);			xfree(fn);			fclose(fp);		} else {			log_errno(newobj_fn);		}	}	if (r == SUCCESS) {		WAIS_NewObj++;	}	return r;}int WAIS_IND_Index_Start(){	WAIS_NewObj = 0;	return SUCCESS;}int WAIS_IND_Index_Full(){	static char comm[BUFSIZ];	int r;	Log("Starting WAIS Full Indexing.\n");	if (!strcasecmp(WAISflavor, "commercial-wais"))		sprintf(comm, "%s -parse soif -r %s/objects | %s -d %s/%s", 			WAISparse, DIRpath, WAISindex, DIRpath, WAISdbname);	else		sprintf(comm, "%s -r -d %s/%s %s/objects", WAISindex,			DIRpath, WAISdbname, DIRpath);	r = WAIS_Start_Indexing(comm);	Log("Finished WAIS Full Indexing.\n");	return r;}int WAIS_IND_Index_Incremental(){	static char comm[BUFSIZ];	int r;	if (newobj_fn == NULL) {		Log("No incremental indexing to do...\n");		return SUCCESS;	}	Log("Staring WAIS Incremental Indexing.\n");	if (!strcasecmp(WAISflavor, "commercial-wais"))		sprintf(comm, "cat %s | is-readable | %s -parse soif - | %s -d %s/%s -append", newobj_fn, WAISparse, WAISindex, DIRpath, WAISdbname);	else		sprintf(comm, "%s -a -r -d %s/%s %s/objects",			WAISindex, DIRpath, WAISdbname, DIRpath);	r = WAIS_Start_Indexing(comm);	Log("Finished WAIS Incremental Indexing.\n");	return r;}int WAIS_IND_Index_Flush(){	int ret = SUCCESS;	if (WAIS_NewObj > 0) {		switch (IndexType) {		case I_FULL:			ret = WAIS_IND_Index_Full();			break;		case I_INCR:			ret = WAIS_IND_Index_Incremental();			break;		case I_PER_OBJ:			/* we've already done the indexing */			break;		default:			fatal("WAIS_IND_Index_Flush() FATAL ERROR\n");		}	}	/* clean up after the new object list */	if (newobj_fn != NULL) {		(void)unlink(newobj_fn);		xfree(newobj_fn);		newobj_fn = NULL;	}	return (ret);}int WAIS_IND_Destroy_Obj(entry)reg_t *entry;{	static char comm[BUFSIZ];	char *fn = NULL;	/* valid only for WAIS, Inc. */	if (strcasecmp(WAISflavor, "commercial-wais") != 0)		return;	/* this is slow since you need 1 fork per delete */	/* you can run a single waisdelete command and print args to stdin */	if (SM_Exist_Obj(entry->FD) == TRUE) {		Log("Removing WAIS object %d from index.\n", entry->FD);		fn = SM_Get_Obj_Filename(entry->FD);		sprintf(comm, "%s -d %s/%s -x %s", 			WAISdelete, DIRpath, WAISdbname, fn);		do_system(comm);		xfree(fn);	}	return SUCCESS;	}int WAIS_IND_initialize(){	/* default values */	IndexType = I_FULL;	WAISindex = xstrdup("waisindex");	WAISparse = xstrdup("waisparse");	WAISlookup = xstrdup("waislookup");	WAISsearch = xstrdup("waissearch");	WAISserver = xstrdup("waisserver");	WAISdelete = xstrdup("waisdelete");	WAISflavor = xstrdup("freewais");	WAISbin = NULL;	WAIShost = NULL;	WAISlog = NULL;	WAISport = 0;	return SUCCESS;}int WAIS_IND_config(value, tag)char *value;char *tag;{#if DEBUG1	if ((value != NULL) && (tag != NULL)) {		Log("Wais Configuration: %s %s\n", tag, value);	}#endif	if ((tag == NULL) || (value == NULL)) {		return ERROR;	}	if (strcasecmp(tag, S_WAISLOG) == 0) {		FILE *fp;		WAISlog = xstrdup(value);		if ((fp = fopen(WAISlog, "a")) != NULL)			fclose(fp);	/* create it */		verify_path(WAISlog);	} else if (strcasecmp(tag, S_WAISFLAVOR) == 0) {		WAISflavor = xstrdup(value);	} else if (strcasecmp(tag, S_WAISBIN) == 0) {		WAISbin = xstrdup(value);		verify_exe(WAISbin);	} else if (strcasecmp(tag, S_WAISINDEX) == 0) {		WAISindex = xstrdup(value);		verify_exe(WAISindex);	} else if (strcasecmp(tag, S_WAISPARSE) == 0) {		WAISparse = xstrdup(value);		verify_exe(WAISparse);	} else if (strcasecmp(tag, S_WAISSEARCH) == 0) {		WAISsearch = xstrdup(value);		verify_exe(WAISsearch);	} else if (strcasecmp(tag, S_WAISDELETE) == 0) {		WAISdelete = xstrdup(value);		verify_exe(WAISdelete);	} else if (strcasecmp(tag, S_WAISSERVER) == 0) {		WAISserver = xstrdup(value);		verify_exe(WAISserver);	} else if (strcasecmp(tag, S_WAISLOOKUP) == 0) {		WAISlookup = xstrdup(value);		verify_exe(WAISlookup);	} else if (strcasecmp(tag, S_WAISDB) == 0) {		WAISdbname = xstrdup(value);	} else if (strcasecmp(tag, S_WAISHOST) == 0) {		WAIShost = xstrdup(value);	} else if (strcasecmp(tag, S_WAISPORT) == 0) {		if (sscanf(value, "%d", &WAISport) != 1)			errorlog("Illegal WAIS-Port broker.conf.\n");	}	return SUCCESS;}int WAIS_IND_do_query(ql, rsock, qflag, ptime)qlist_t *ql;int rsock;int qflag;time_t ptime;{	/* Make sure that waisserver is running */	if (WAISport > 0 && IndexServer_pid == 0) {		WAIS_Start_WAISserver();	}	return (WAIS_do_query(ql, rsock, qflag, ptime));}void WAIS_IND_Init_Flags(){	WAIS_gotstructured = 0;	WAIS_maxresults = 0;	WAIS_illegal_query = 0;}void WAIS_IND_Set_Flags(flag, val)char *flag;char *val;{	if (flag == NULL)		return;#if DEBUG2	Log("Query flags %s %s\n", flag, (val == NULL) ? "(null)" : val);#endif	if ((strcasecmp(flag, "maxresult") == 0) && val != NULL) {		WAIS_maxresults = atoi(val);		if (WAIS_maxresults < 0)			WAIS_maxresults = 0;	} else if (strcasecmp(flag, "case") == 0) {		/* IGNORE */ ;	} else {		WAIS_illegal_query = 1;	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -