⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
	 *  client might die suddenly via ^C or SIGTERM, but we still	 *  want the results.	 */	char tmpbuf[1024];	int n;	while ((n = read(sockfd, tmpbuf, 1024)) > 0) {		write(fileno(stdout), tmpbuf, n);	}}#endif	/*USE_MSGHDR*/	close(sockfd);	RETURNMAIN(ret);doityourself:#if	DEBUG	printf("doing it myself :-(\n");#endif	/*DEBUG*/#endif	/*CLIENTSERVER*/	setbuf(stdout, NULL);	/* Unbuffered I/O to always get every result */	setbuf(stderr, NULL);	glimpse_call = 0;	glimpse_clientdied = 0;	ret = process_query(oldargc, oldargv, fileno(stdin));	RETURNMAIN(ret);#endif	/*ISSERVER && CLIENTSERVER*/}process_query(argc, argv, newsockfd)int argc; char *argv[];int newsockfd;{	int	searchpercent;	int	num_blocks;	int	num_read;	int	i, j;	int	iii; /* Udi */	int	jjj;	char	c;	char	*p;	int	ret;	int	jj;	int	quitwhile;	char	indexdir[MAX_LINE_LEN];	char	temp_filenames_file[MAX_LINE_LEN];	char	temp_bitfield_file[MAX_LINE_LEN];	char	TEMP_FILE[MAX_LINE_LEN];	char	temp_file[MAX_LINE_LEN];	int	oldargc = argc;	char	**oldargv = argv;	CHAR	dummypat[MAX_PAT];	int	dummylen=0;	int	my_M_index, my_P_index, my_b_index, my_A_index, my_l_index = -1, my_B_index = -1;	char	**outname;	int	gnum_of_matched = 0;	int	gprev_num_of_matched = 0;	int	gfiles_matched = 0;	int	foundpat = 0;	int	wholefilescope=0;	int	nobytelevelmustbeon=0;	long	get_file_time();	if ((argc <= 0) || (argv == NULL)) {		errno = EINVAL;		return -1;	}/* * Macro to destroy EVERYTHING before return since we might want to make this a * library function later on: convention is that after destroy, objects are made * NULL throughout the source code, and are all set to NULL at initialization time. * DO agrep_argv, index_argv and FileOpt my_malloc/my_free optimizations later. * my_free calls have 2nd parameter = 0 if the size is not easily determinable. */#define RETURN(val) \{\	int	q,k;\\	first_search = 0;\	for (k=0; k<MAX_ARGS; k++) {\		if (agrep_argv[k] != NULL) my_free(agrep_argv[k], 0);\		if (index_argv[k] != NULL) my_free(index_argv[k], 0);\		agrep_argv[k] = index_argv[k] = NULL;\	}\	if (FileOpt != NULL) my_free(FileOpt, MAXFILEOPT);\	FileOpt = NULL;\	for (k=0; k<MAXNUM_PAT; k++) {\		if (pat_list[k] != NULL) my_free(pat_list[k], 0);\		pat_list[k] = NULL;\	}\	sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());\	unlink(tempfile);\	sprintf(outname[0], "%s/.glimpse_apply.%d", TEMP_DIR, getpid());\	unlink(outname[0]);\	my_free(outname[0], 0);\	my_free(outname, 0);\	my_free(TEMP_DIR, MAX_LINE_LEN);\	my_free(filenames_file, MAX_LINE_LEN);\	my_free(bitfield_file, MAX_LINE_LEN);\\	if (ByteLevelIndex) {\		if (src_offset_table != NULL) for (k=0; k<OneFilePerBlock; k++) {\			free_list(&src_offset_table[k]);\		}\		/* Don't make src_offset_table itself NULL: it will be bzero-d below if !NULL */\		for (q=0; q<MAXNUM_PAT; q++) {\		    if (multi_dest_offset_table[q] != NULL) for (k=0; k<OneFilePerBlock; k++) {\			free_list(&multi_dest_offset_table[q][k]);\		    }\		    /* Don't make multi_dest_offset_table[q] itself NULL: it will be bzero-d below if !NULL */\		}\	}\	for (k=0; k<num_terminals;k++)\		free(terminals[k].data.leaf.value);\	if (ComplexBoolean) destroy_tree(&GParse);\	for (k=0; k<GNumfiles; k++) {\		my_free(GTextfiles[k], 0);\		GTextfiles[k] = NULL;\	}\	/* Don't free the GTextfiles buffer itself since it is allocated once in get_filename.c */\	return (val);\}	/*	 * Initialize	 */	strcpy(&GProgname[0], argv[0]);	if (argc <= 1) return(usage());	filenames_file = (char *)my_malloc(MAX_LINE_LEN);	bitfield_file = (char *)my_malloc(MAX_LINE_LEN);	TEMP_DIR = (char *)my_malloc(MAX_LINE_LEN);	strcpy(TEMP_DIR, "/tmp");	D_length = 0;	D = 0;	pattern_index = 0;	first_search = 1;	outname  = (char **)my_malloc(sizeof(char *));	outname[0] = (char *)my_malloc(MAX_LINE_LEN);	NOBYTELEVEL = 0;	OPTIMIZEBYTELEVEL = 0;	GCONSTANT = 0;	GLIMITOUTPUT = 0;	GLIMITTOTALFILE = 0;	GBESTMATCH = 0;	GRECURSIVE = 0;	GNOPROMPT = 0;	GBYTECOUNT = 0;	GPRINTFILENUMBER = 0;	GPRINTFILETIME = 0;	GOUTTAIL = 2;	/* stupid fix, but works */	GFILENAMEONLY = 0;	GNOFILENAME = 0;	GPRINTNONEXISTENTFILE = 0;	MATCHFILE = 0;	PRINTATTR = 0;	PRINTINDEXLINE = 0;	Pat_as_is=0;	Only_first = 0;	PRINTAPPXFILEMATCH = 0;	GCOUNT = 0;	HINTSFROMUSER = 0;	FILENAMESINFILE=0;	BITFIELDFILE=0;	BITFIELDOFFSET=0;	BITFIELDLENGTH=0;	BITFIELDENDIAN=0;	GNumDays = 0;	foundattr = 0;	foundnot = 0;	ComplexBoolean = 0;	bestmatcherrors = 0;	patbufpos = -1;	RegionLimit=DEFAULT_REGION_LIMIT;	strcpy(GD_pattern, "\n");	GD_length = strlen(GD_pattern);	indexdir[0] = '\0';	memset(index_argv, '\0', sizeof(char *) * MAX_ARGS);	index_argc = 0;	memset(agrep_argv, '\0', sizeof(char *) * MAX_ARGS);	agrep_argc = 0;	FileOpt = NULL;	fileopt_length = 0;	memset(pat_list, '\0', sizeof(char *) * MAXNUM_PAT);	memset(pat_attr, '\0', sizeof(int) * MAXNUM_PAT);	for (i=0; i<MAX_ARGS; i++)		index_argv[i] = (char *)my_malloc(MaxNameLength + 2);	memset(is_mgrep_pat, '\0', sizeof(int) * MAXNUM_PAT);	memset(mgrep_pat_index, '\0', sizeof(int) *MAXNUM_PAT);	num_mgrep_pat = 0;	memset(pat_buf, '\0', (MAXNUM_PAT + 2)*MAXPAT);	pat_ptr = 0;	sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());	/* Set WHOLEFILESCOPE for per-request processing at server */	if (StructuredIndex) WHOLEFILESCOPE = 1;	else WHOLEFILESCOPE = 0;	last_Y_filenumber = 0;	if (argc > MAX_ARGS) {#if	ISSERVER	fprintf(stderr, "too many arguments %d obtained on server!\n", argc);#endif	/*ISSERVER*/		i = fileagrep(oldargc, oldargv, 0, stdout);		RETURN(i);	}	/*	 * Process what options you can, then call fileagrep_init() to set	 * options in agrep and get the pattern. Then, call fileagrep_search().	 * Begin by copying options into agrep_argv assuming glimpse was not	 * called as agrep (optimistic :-).	 */	agrep_argc = 0;	for (i=0; i<MAX_ARGS; i++) agrep_argv[i] = NULL;	agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);	strcpy(agrep_argv[agrep_argc], argv[0]);	/* copy the name of the program anyway */	agrep_argc ++;	/* In glimpse, you should never output filenames with zero matches */	if (agrep_argc + 1 >= MAX_ARGS) {		fprintf(stderr, "%s: too many options!\n", GProgname);		RETURN(usage());	}	agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));	agrep_argv[agrep_argc][0] = '-';	agrep_argv[agrep_argc][1] = 'z';	agrep_argv[agrep_argc][2] = '\0';	agrep_argc ++;	/* In glimpse, you should always print pattern when using mgrep (user can't do -f or -m)! */	if (agrep_argc + 1 >= MAX_ARGS) {		fprintf(stderr, "%s: too many options!\n", GProgname);		RETURN(usage());	}	agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));	agrep_argv[agrep_argc][0] = '-';	agrep_argv[agrep_argc][1] = 'P';	agrep_argv[agrep_argc][2] = '\0';	my_P_index = agrep_argc;	agrep_argc ++;	/* In glimpse, you should always output multiple when doing mgrep */	if (agrep_argc + 1 >= MAX_ARGS) {		fprintf(stderr, "%s: too many options!\n", GProgname);		RETURN(usage());	}	agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));	agrep_argv[agrep_argc][0] = '-';	agrep_argv[agrep_argc][1] = 'M';	agrep_argv[agrep_argc][2] = '\0';	my_M_index = agrep_argc;	agrep_argc ++;	/* In glimpse, you should print the byte offset if there is a structured query */	if (agrep_argc + 1 >= MAX_ARGS) {		fprintf(stderr, "%s: too many options!\n", GProgname);		RETURN(usage());	}	agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));	agrep_argv[agrep_argc][0] = '-';	agrep_argv[agrep_argc][1] = 'b';	agrep_argv[agrep_argc][2] = '\0';	my_b_index = agrep_argc;	agrep_argc ++;	/* In glimpse, you should always have space for doing -m if required */	if (agrep_argc + 2 >= MAX_ARGS) {		fprintf(stderr, "%s: too many options!\n", GProgname);		RETURN(usage());	}	agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));	agrep_argv[agrep_argc][0] = '-';	agrep_argv[agrep_argc][1] = 'm';	agrep_argv[agrep_argc][2] = '\0';	agrep_argc ++;	agrep_argv[agrep_argc] = (char *)my_malloc(2);	/* no op */	agrep_argv[agrep_argc][0] = '\0';	agrep_argc ++;	/* Add -A option to print filenames as default */	if (agrep_argc + 1 >= MAX_ARGS) {		fprintf(stderr, "%s: too many options!\n", GProgname);		RETURN(usage());	}	agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));	agrep_argv[agrep_argc][0] = '-';	agrep_argv[agrep_argc][1] = 'A';	agrep_argv[agrep_argc][2] = '\0';	my_A_index = agrep_argc;	agrep_argc ++;	while((agrep_argc < MAX_ARGS) && (--argc > 0) && (*++argv)[0] == '-' ) {		p = argv[0] + 1;	/* ptr to first character after '-' */		c = *(argv[0]+1);		quitwhile = OFF;		while (!quitwhile && (*p != '\0')) {			c = *p;			switch(c) {			case 'F' : 				MATCHFILE = ON;				FileOpt = (CHAR *)my_malloc(MAXFILEOPT);				if (*(p + 1) == '\0') {/* space after - option */					if(argc <= 1) {						fprintf(stderr, "%s: a file pattern must follow the -F option\n", GProgname);						RETURN(usage());					}					argv++;					if ((dummylen = strlen(argv[0])) > MAXFILEOPT) {						fprintf(stderr, "%s: -F option list too long\n", GProgname);						RETURN(usage());					}					strcpy(FileOpt, argv[0]);					argc--;				} else {					if ((dummylen = strlen(p+1)) > MAXFILEOPT) {						fprintf(stderr, "%s: -F option list too long\n", GProgname);						RETURN(usage());					}					strcpy(FileOpt, p+1);				} /* else */				quitwhile = ON;				break;			/* search the index only and output the number of blocks */			case 'N' :				Only_first = ON;				break ;			/* also keep track of the matches in each file */			case 'Q' :				PRINTAPPXFILEMATCH = ON;				break ;			case 'U' :				InfoAfterFilename = ON;				break;						case '!' :				HINTSFROMUSER = ON;				break;			/* go to home directory to find the index: even if server overwrites indexdir here, it won't overwrite INDEX_DIR until read_index() */			case 'H' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: a directory name must follow the -H option\n", GProgname);						RETURN(usage());					}					argv ++;#if	!ISSERVER					strcpy(indexdir, argv[0]);#endif	/*!ISSERVER*/					argc --;				}#if	!ISSERVER				else {					strcpy(indexdir, p+1);				}				agrep_argv[agrep_argc] = (char *)my_malloc(4);				strcpy(agrep_argv[agrep_argc], "-H");				agrep_argc ++;				agrep_argv[agrep_argc] = (char *)my_malloc(strlen(indexdir) + 2);				strcpy(agrep_argv[agrep_argc], indexdir);				agrep_argc ++;#endif	/*!ISSERVER*/				quitwhile = ON;				break;#if	ISSERVER && SFS_COMPAT			/* INDEX_DIR will be already set since this is the server, so we can direclty xfer the .glimpse_* files */			case '.' :				strcpy(TEMP_FILE, INDEX_DIR);				strcpy(temp_file, ".");				strcat(TEMP_FILE, "/.");				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: a file name must follow the -. option\n", GProgname);						RETURN(usage());					}					argv ++;					strcat(TEMP_FILE, argv[0]);					strcat(temp_file, argv[0]);					argc --;				}				else {					strcat(TEMP_FILE, p+1);					strcat(temp_file, p+1);				}				if (!strcmp(temp_file, INDEX_FILE) || !strcmp(temp_file, FILTER_FILE) ||				    !strcmp(temp_file, ATTRIBUTE_FILE) || !strcmp(temp_file, MINI_FILE) ||				    !strcmp(temp_file, P_TABLE) || !strcmp(temp_file, PROHIBIT_LIST) ||				    !strcmp(temp_file, INCLUDE_LIST) || !strcmp(temp_file, NAME_LIST) ||				    !strcmp(temp_file, NAME_LIST_INDEX) || !strcmp(temp_file, NAME_HASH) ||				    !strcmp(temp_file, NAME_HASH_INDEX) || !strcmp(temp_file, DEF_STAT_FILE) ||				    !strcmp(temp_file, DEF_MESSAGE_FILE) || !strcmp(temp_file, DEF_TIME_FILE)) {					if ((ret = open(TEMP_FILE, O_RDONLY, 0)) <= 0) RETURN(ret);					while ((num_read = read(ret, matched_region, MAX_REGION_LIMIT*2)) > 0) {						write(1 /* NOT TO newsockfd since that was got by a syscall!!! */, matched_region, num_read);					}					close(ret);				}				quitwhile = ON;				RETURN(0);#endif	/* ISSERVER */			/* go to temp directory to create temp files */			case 'T' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: a directory name must follow the -T option\n", GProgname);						RETURN(usage());					}					argv ++;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -