⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
					strcpy(TEMP_DIR, argv[0]);					argc --;				}				else {					strcpy(TEMP_DIR, p+1);				}				sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());				quitwhile = ON;				break;			/* To get files within some number of days before indexing was done */			case 'Y':				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: the number of days must follow the -Y option\n", GProgname);						RETURN(usage());					}					argv ++;					GNumDays = atoi(argv[0]);					argc --;				}				else {					GNumDays = atoi(p+1);				}				if (GNumDays <= 0) {					fprintf(stderr, "%s: the number of days %d must be > 0\n", GProgname, GNumDays);					RETURN(usage());				}				quitwhile = ON;				break;			case 'R' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: the record size must follow the -R option\n", GProgname);						RETURN(usage());					}					argv ++;					RegionLimit = atoi(argv[0]);					argc --;				}				else {					RegionLimit = atoi(p+1);				}				if ((RegionLimit <= 0) || (RegionLimit > MAX_REGION_LIMIT)) {					fprintf(stderr, "Bad record size %d: must be in [%d, %d]: using default %d\n",						RegionLimit, 1, MAX_REGION_LIMIT, DEFAULT_REGION_LIMIT);					RegionLimit = DEFAULT_REGION_LIMIT;				}				quitwhile = ON;				break;			/* doesn't matter if we overwrite the value in the client since the same value would have been picked up in main() anyway */			case 'J' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: the server host name must follow the -J option\n", GProgname);						RETURN(usageS());					}					argv ++;#if	!ISSERVER					strcpy(SERV_HOST, argv[0]);#endif	/*!ISSERVER*/					argc --;				}#if	!ISSERVER				else {					strcpy(SERV_HOST, p+1);				}#endif	/*!ISSERVER*/				quitwhile = ON;				break;			/* doesn't matter if we overwrite the value in the client since the same value would have been picked up in main() anyway */			case 'K' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: the server port must follow the -C option\n", GProgname);						RETURN(usage());					}					argv ++;#if	!ISSERVER					SERV_PORT = atoi(argv[0]);#endif	/*!ISSERVER*/					argc --;				}#if	!ISSERVER				else {					SERV_PORT = atoi(p+1);				}				if ((SERV_PORT < MIN_SERV_PORT) || (SERV_PORT > MAX_SERV_PORT)) {					fprintf(stderr, "Bad server port %d: must be in [%d, %d]: using default %d\n",						SERV_PORT, MIN_SERV_PORT, MAX_SERV_PORT, DEF_SERV_PORT);					SERV_PORT = DEF_SERV_PORT;				}#endif	/*!ISSERVER*/				quitwhile = ON;				break;			/* Based on contribution From ada@mail2.umu.se Fri Jul 12 01:56 MST 1996; Christer Holgersson, Sen. SysNet Mgr, Umea University/SUNET, Sweden */			/* the bit-mask corresponding to the set of filenames within which the pattern should be searched is explicitly provided in a filename (absolute path name) */			case 'p' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: the bitfield file [and an offset/length/endian separated by :] must follow the -p option\n", GProgname);						RETURN(usage());					}					argv ++;					strcpy(bitfield_file, argv[0]);					argc --;				}				else {					strcpy(bitfield_file, p+1);				}				/* Find offset and length into bitfield file */				{					int iiii = 0;					BITFIELDOFFSET=0;					BITFIELDLENGTH=0;					BITFIELDENDIAN=0;					iiii = 0;					while (bitfield_file[iiii] != '\0') {						if (bitfield_file[iiii] == '\\') {							iiii ++;							if (bitfield_file[iiii] == '\0') break;							if (bitfield_file[iiii] == ':') {								strcpy(&bitfield_file[iiii-1], &bitfield_file[iiii]);							}							else iiii ++;							continue;						}						if (bitfield_file[iiii] == ':') {							bitfield_file[iiii] = '\0';							sscanf(&bitfield_file[iiii+1], "%d:%d:%d", &BITFIELDOFFSET, &BITFIELDLENGTH, &BITFIELDENDIAN);							if ((BITFIELDOFFSET < 0) || (BITFIELDLENGTH < 0) || (BITFIELDENDIAN < 0)) {								fprintf(stderr, "Wrong offset %d or length %d or endian %d of bitfield file\n", BITFIELDOFFSET, BITFIELDLENGTH, BITFIELDENDIAN);								RETURN(usage());							}							break;						}						iiii++;					}#if	BG_DEBUG					fprintf(debug, "BITFIELD %s : %d : %d : %d\n", BITFIELDFILE, BITFIELDOFFSET, BITFIELDLENGTH, BITFIELDENDIAN);#endif				}				if (bitfield_file[0] != '/') {					getcwd(temp_bitfield_file, MAX_LINE_LEN-1);					strcat(temp_bitfield_file, "/");					strcat(temp_bitfield_file, bitfield_file);					strcpy(bitfield_file, temp_bitfield_file);				}				BITFIELDFILE = 1;				quitwhile = ON;				break;			/* the set of filenames within which the pattern should be searched is explicitly provided in a filename (absolute path name) */			case 'f' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: the filenames file must follow the -f option\n", GProgname);						RETURN(usage());					}					argv ++;					strcpy(filenames_file, argv[0]);					argc --;				}				else {					strcpy(filenames_file, p+1);				}				if (filenames_file[0] != '/') {					getcwd(temp_filenames_file, MAX_LINE_LEN-1);					strcat(temp_filenames_file, "/");					strcat(temp_filenames_file, filenames_file);					strcpy(filenames_file, temp_filenames_file);				}				FILENAMESINFILE = 1;				quitwhile = ON;				break;			case 'C' :				CONTACT_SERVER = 1;				break;			case 'a' :				PRINTATTR = 1;				break;			case 'E':				PRINTINDEXLINE = 1;				break;			case 'W':				wholefilescope = 1;				break;			case 'z' :				UseFilters = 1;				break;			case 'r' :				GRECURSIVE = 1;				break;			case 'V' :				printf("\nThis is glimpse version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);				RETURN(0);			/* Must let 'm' fall thru to default once multipatterns are done in agrep */			case 'm' :			case 'v' :				fprintf(stderr, "%s: illegal option: '-%c'\n", GProgname, c);				RETURN(usage());			case 'I' :			case 'D' :			case 'S' :				/* There is no space after these options */				agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);				agrep_argv[agrep_argc][0] = '-';				strcpy(agrep_argv[agrep_argc] + 1, p);				agrep_argc ++;				quitwhile = ON;				break;			case 'l':				GFILENAMEONLY = 1;				my_l_index = agrep_argc;				agrep_argv[agrep_argc] = (char *)my_malloc(4);				agrep_argv[agrep_argc][0] =  '-';				agrep_argv[agrep_argc][1] = c;				agrep_argv[agrep_argc][2] = '\0';				agrep_argc ++;				break;			/*			 * Copy the set of options for agrep: put them in separate argvs			 * even if they are together after one '-' (easier to process).			 * These are agrep options which glimpse has to peek into.			 */			default:				agrep_argv[agrep_argc] = (char *)my_malloc(16);				agrep_argv[agrep_argc][0] =  '-';				agrep_argv[agrep_argc][1] = c;				agrep_argv[agrep_argc][2] = '\0';				agrep_argc ++;				if (c == 'n') {					nobytelevelmustbeon=1;				}				else if (c == 'X') GPRINTNONEXISTENTFILE = 1;				else if (c == 'j') GPRINTFILETIME = 1;				else if (c == 'b') GBYTECOUNT = 1;				else if (c == 'g') GPRINTFILENUMBER = 1;				else if (c == 't') GOUTTAIL = 1;				else if (c == 'y') GNOPROMPT = 1;				else if (c == 'h') GNOFILENAME = 1;				else if (c == 'c') GCOUNT = 1;				else if (c == 'B') {					GBESTMATCH = 1;					my_B_index = agrep_argc - 1;				}				/* the following options are followed by a parameter */				else if ((c == 'e') || (c == 'd') || (c == 'L') || (c == 'k')) {					if (*(p + 1) == '\0') {/* space after - option */						if(argc <= 1) {							fprintf(stderr, "%s: the '-%c' option must have an argument\n", GProgname, c);							RETURN(usage());						}						argv++;						if ( (c == 'd') && ((D_length = strlen(argv[0])) > MAX_NAME_SIZE) ) {							fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", GProgname, MAX_NAME_SIZE);							RETURN(usage());							/* Should this be RegionLimit if ByteLevelIndex? */						}						else if (c == 'L') {							GLIMITOUTPUT = GLIMITTOTALFILE = GLIMITPERFILE = 0;							sscanf(argv[0], "%d:%d:%d", &GLIMITOUTPUT, &GLIMITTOTALFILE, &GLIMITPERFILE);							if ((GLIMITOUTPUT < 0) || (GLIMITTOTALFILE < 0) || (GLIMITPERFILE < 0)) {								fprintf(stderr, "%s: invalid output limit %s\n", GProgname, argv[0]);								RETURN(usage());							}						}						agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);						strcpy(agrep_argv[agrep_argc], argv[0]);						if (c == 'd') {							preprocess_delimiter(argv[0], D_length, GD_pattern, &GD_length);							if (GOUTTAIL == 2) GOUTTAIL = 0;							/* Should this be RegionLimit if ByteLevelIndex? */						}						if (c == 'k') GCONSTANT = 1;						argc--;					} else {						if ( (c == 'd') && ((D_length = strlen(p+1)) > MAX_NAME_SIZE) ) {							fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", GProgname, MAX_NAME_SIZE);							RETURN(usage());							/* Should this be RegionLimit if ByteLevelIndex? */						}						else if (c == 'L') {							GLIMITOUTPUT = GLIMITTOTALFILE = GLIMITPERFILE = 0;							sscanf(p+1, "%d:%d:%d", &GLIMITOUTPUT, &GLIMITTOTALFILE, &GLIMITPERFILE);							if ((GLIMITOUTPUT < 0) || (GLIMITTOTALFILE < 0) || (GLIMITPERFILE < 0)) {								fprintf(stderr, "%s: invalid output limit %s\n", GProgname, p+1);								RETURN(usage());							}						}						agrep_argv[agrep_argc] = (char *)my_malloc(strlen(p+1) + 2);						strcpy(agrep_argv[agrep_argc], p+1);						if (c == 'd') {							preprocess_delimiter(p+1, D_length-2, GD_pattern, &GD_length);							if (GOUTTAIL == 2) GOUTTAIL = 0;							/* Should this be RegionLimit if ByteLevelIndex? */						}						if (c == 'k') GCONSTANT = 1;					}					agrep_argc ++;#if	DEBUG					fprintf(stderr, "%d = %s\n", agrep_argc, agrep_argv[agrep_argc - 1]);#endif	/*DEBUG*/					quitwhile = ON;					if ((c == 'e') || (c == 'k')) foundpat = 1;				}				/* else it is something that glimpse doesn't know and agrep needs to look at */				break;	/* from default: */			} /* switch(c) */			p ++;		}	} /* while (--argc > 0 && (*++argv)[0] == '-') *//* exitloop: */	if ((GBESTMATCH == ON) && (MATCHFILE == ON) && (Only_first == ON))		fprintf(stderr, "%s: Warning: the number of matches may be incorrect when -B is used with -F.\n", HARVEST_PREFIX);	if (GOUTTAIL) GOUTTAIL = 1;	if (GNOFILENAME) {		agrep_argv[my_A_index][1] = 'Z';	/* ignore the -A option */	}#if	ISSERVER	if (RemoteFiles) {	/* force -NQ so that won't start looking for files! */		Only_first = ON;		PRINTAPPXFILEMATCH = ON;	}#endif	if (argc > 0) {		/* copy the rest of the options the pattern and the filenames if any verbatim */		for (i=0; i<argc; i++) {			if (agrep_argc >= MAX_ARGS) break;			agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);			strcpy(agrep_argv[agrep_argc], argv[0]);			agrep_argc ++;			argv ++; }		if (!foundpat) argc --;	}#if	0	for (j=0; j<agrep_argc; j++) printf("agrep_argv[%d] = %s\n", j, agrep_argv[j]);	printf("argc = %d\n", argc);#endif	/*0*/	/*	 * Now perform the search by first looking at the index	 * and obtaining the files to search; and then search	 * them and output the result. If argc > 0, glimpse	 * runs as agrep: otherwise, it searches index, etc.	 */	if (argc <= 0) {		if (RecordLevelIndex) {	/* based on work done for robint@zedcor.com Robin Thomas, Art Today, Tucson, AZ */			/*			if ((D_length > 0) && strcmp(GD_pattern, rdelim)) {				fprintf(stderr, "Index created for delimiter `%s': cannot search with delimiter `%s'\n", rdelim, GD_pattern);				RETURN(-1);			}			SHOULD I HAVE THIS CHECK? MAYBE GD_pattern is a SUBSTRING OF rdelim??? But this is safest thing to do... robint@zedcor.com			*/			RegionLimit = 0;	/* region is EXACTLY the same record number, not a portion of a file within some offset+length */		}		glimpse_call = 1;		/* Initialize some data structures, read the index */		if (GRECURSIVE == 1) {			fprintf(stderr, "illegal option: '-r'\n");			RETURN(usage());		}		num_terminals = 0;		GParse = NULL;		memset(terminals, '\0', sizeof(ParseTree) * MAXNUM_PAT);#if	!ISSERVER		if (-1 == read_index(indexdir)) RETURN(-1);#endif	/*!ISSERVER*//*This handles the -n option with ByteLevelIndex: disabled as of now, else should go into file search...*/		if (nobytelevelmustbeon && (ByteLevelIndex && !RecordLevelIndex)) {	/* with RecordLevelIndex, we'll do search, so don't set NOBYTELEVEL */			/* fprintf(stderr, "Warning: -n option used with byte-level index: must SEARCH the files\n"); */			NOBYTELEVEL=ON;		}		WHOLEFILESCOPE = (WHOLEFILESCOPE || wholefilescope);		if (ByteLevelIndex) {			/* Must zero them here in addition to index search so that RETURN macro runs correctly */			if ((src_offset_table == NULL) &&			    ((src_offset_table = (struct offsets **)my_malloc(sizeof(struct offsets *) * OneFilePerBlock)) == NULL)) exit(2);			memset(src_offset_table, '\0', sizeof(struct offsets *) * OneFilePerBlock);			for (i=0; i<MAXNUM_PAT; i++) {				if ((multi_dest_offset_

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -