⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
			agrep_inpointer = 0;			if ((ret = exec(-1, NULL)) > 0)				totalret ++; /* += ret; */ 			else if ((ret < 0) && (errno == AGREP_ERROR)) {				fclose(infp);				return -1;			}		}		if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||		    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) break;	/* done */		if ((totalret > 0) && FILENAMEONLY) break;	} /* while *p1 != NULL */	SetCurrentByteOffset = 0;	fclose(infp);	if (totalret > 0) {	/* dirty solution: must handle part of agrep here */		if (COUNT && !FILEOUT && !SILENT) {			if(!NOFILENAME) fprintf(outfp, "%s: %d\n", CurrentFileName, totalret);			else fprintf(outfp, "%d\n", totalret);		}		else if (FILEOUT) {			file_out(realfilename);		}	}	return totalret;}/* Sets lastfilenumber that needs to be searched: rest must be discarded */intprocess_Y_option(num_files, num_days, fp)	int	num_files, num_days;	FILE	*fp;{	CHAR	arrayend[4];	last_Y_filenumber = 0;	if ((num_days <= 0) || (fp == NULL) || (timesindexsize <= 0)) return 0;	last_Y_filenumber = num_files;	if (num_days * sizeof(int) >= timesindexsize) return 0;	/* everything will be within so many days */	if (fseek(fp, num_days*sizeof(int), 0) == -1) return -1;	fread(arrayend, 1, 4, fp);	if ((last_Y_filenumber = (arrayend[0] << 24) | (arrayend[1] << 16) | (arrayend[2] << 8) | arrayend[3]) > num_files) last_Y_filenumber = num_files;	if (last_Y_filenumber == 0) {		last_Y_filenumber = 1;		printf("Warning: no files modified in the last %d days were found in the index.\nSearching only the most recently modified file...\n", num_days);	}	return 0;}read_index(indexdir)char	indexdir[MAXNAME];{	char	*home;	char	s[MAXNAME];	int	ret;	if (indexdir[0] == '\0') {		if ((home = (char *)getenv("HOME")) == NULL) {			getcwd(indexdir, MAXNAME-1);			fprintf(stderr, "using working-directory '%s' to locate index\n", indexdir);		}		else strncpy(indexdir, home, MAXNAME);	}	ret = chdir(indexdir);	if (getcwd(INDEX_DIR, MAXNAME-1) == NULL) strcpy(INDEX_DIR, indexdir);	if (ret < 0) {		fprintf(stderr, "using working-directory '%s' to locate index\n", INDEX_DIR);	}	sprintf(s, "%s", INDEX_FILE);	indexfp = fopen(s, "r");	if(indexfp == NULL) {		fprintf(stderr, "can't open glimpse index-file %s/%s\n", INDEX_DIR, INDEX_FILE);		fprintf(stderr, "(use -H to give an index-directory or run 'glimpseindex' to make an index)\n");		return -1;	}	if (stat(s, &index_stat_buf) == -1) {		fprintf(stderr, "can't stat %s/%s\n", INDEX_DIR, s);		fclose(indexfp);		return -1;	}	sprintf(s, "%s", P_TABLE);	partfp = fopen(s, "r");	if(partfp == NULL) {		fprintf(stderr, "can't open glimpse partition-table %s/%s\n", INDEX_DIR, P_TABLE);		fprintf(stderr, "(use -H to specify an index-directory or run glimpseindex to make an index)\n");		fclose(indexfp);		return -1;	}	sprintf(s, "%s", DEF_TIME_FILE);	timesfp = fopen(s, "r");	sprintf(s, "%s.index", DEF_TIME_FILE);	timesindexfp = fopen(s, "r");	if (timesindexfp != NULL) {		struct stat st;		fstat(fileno(timesindexfp), &st);		timesindexsize = st.st_size;	}	/* Get options */#if	BG_DEBUG	debug = fopen(DEBUG_FILE, "w+");	if(debug == NULL) {		fprintf(stderr, "can't open file %s/%s, errno=%d\n", INDEX_DIR, DEBUG_FILE, errno);		return(-1);	}#endif	/*BG_DEBUG*/	fgets(indexnumberbuf, 256, indexfp);	if(strstr(indexnumberbuf, "1234567890")) IndexNumber = ON;	else IndexNumber = OFF;	fscanf(indexfp, "%%%d\n", &OneFilePerBlock);	if (OneFilePerBlock < 0) {		ByteLevelIndex = ON;		OneFilePerBlock = -OneFilePerBlock;	}	else if (OneFilePerBlock == 0) {		GNumpartitions = get_table(P_TABLE, p_table, MAX_PARTITION, 0);	}	fscanf(indexfp, "%%%d%s\n", &StructuredIndex, old_rdelim);	/* Set WHOLEFILESCOPE for do-it-yourself request processing at client */	WHOLEFILESCOPE = 1;	if (StructuredIndex <= 0) {		if (StructuredIndex == -2) {			RecordLevelIndex = 1;			strcpy(rdelim, old_rdelim);			rdelim_len = strlen(rdelim);			preprocess_delimiter(rdelim, rdelim_len, rdelim, &rdelim_len);		}		WHOLEFILESCOPE = 0;		StructuredIndex = 0;		PRINTATTR = 0;	/* doesn't make sense: must not go into filter_output */	}	else if (-1 == (StructuredIndex = attr_load_names(ATTRIBUTE_FILE))) {		fprintf(stderr, "error in reading attribute file %s/%s\n", INDEX_DIR, ATTRIBUTE_FILE);		return(-1);	}#if	BG_DEBUG	fprintf(debug, "buf = %s OneFilePerBlock=%d StructuredIndex=%d\n", indexnumberbuf, OneFilePerBlock, StructuredIndex);#endif	/*BG_DEBUG*/	sprintf(s, "%s", MINI_FILE);	minifp = fopen(s, "r");	/* if (minifp==NULL && OneFilePerBlock) fprintf(stderr, "Can't open for reading: %s/%s --- cannot do very fast search\n", INDEX_DIR, MINI_FILE); */	if (OneFilePerBlock && glimpse_isserver && (minifp != NULL)) read_mini(indexfp, minifp);	read_filenames();	/* Once IndexNumber info is available */	set_indexable_char(indexable_char);	set_indexable_char(test_indexable_char);	set_special_char(indexable_char);	return 0;}#define CLEANUP \{\	int	q, k;\	if (timesfp != NULL) fclose(timesfp);\	if (timesindexfp != NULL) fclose(timesindexfp);\	if (indexfp != NULL) fclose(indexfp);\	if (partfp != NULL) fclose(partfp);\	if (minifp != NULL) fclose(minifp);\	if (nullfp != NULL) fclose(nullfp);\	indexfp = partfp = minifp = nullfp = NULL;\	if (ByteLevelIndex) {\		if (src_offset_table != NULL) for (k=0; k<OneFilePerBlock; k++) {\			free_list(&src_offset_table[k]);\		}\		for (q=0; q<MAXNUM_PAT; q++) {\		    if (multi_dest_offset_table[q] != NULL) for (k=0; k<OneFilePerBlock; k++) {\			free_list(&multi_dest_offset_table[q][k]);\		    }\		}\	}\	if (StructuredIndex) {\		attr_free_table();\	}\	destroy_filename_hashtable();\	my_free(SERV_HOST, MAXNAME);\}/* Called whenever we get SIGUSR2/SIGHUP (at the end of process_query()) */reinitialize_server(argc, argv)	int	argc;	char	**argv;{	int	i, fd;	CLEANUP;#if	0	init_filename_hashtable();	region_initialize();	indexfp = partfp = minifp = nullfp = NULL;	if ((nullfp = fopen("/dev/null", "w")) == NULL) {		return(-1);	}	src_offset_table = NULL;	for (i=0; i<MAXNUM_PAT; i++) multi_dest_offset_table[i] = NULL;	if (-1 == read_index(INDEX_DIR)) return(-1);#if	0#ifndef	LOCK_UN#define LOCK_UN	8#endif	if ((fd = open(INDEX_DIR, O_RDONLY)) == -1) return -1;	flock(fd, LOCK_UN);	close(fd);#endif	return 0;#else	return execve(argv[0], argv, environ);#endif}/* MUST CARE IF PIPE/SOCKET IS BROKEN! ALSO SIGUSR1 (hardy@cs.colorado.edu) => QUIT CURRENT REQUEST. */int ignore_signal[32] = {	0,			0, 0, 1, 1, 1, 1, 1, 1,	/* all the tracing stuff: since default action is to dump core */			0, 0, 0, 0, 0, 0, 0, 0,			0, 0, 0, 0, 0, 0, 0, 0,			0, 0, 0, 0, 1, 0, 0 };	/* resource lost: since default action is to dump core *//* S.t. sockets don't persist: they sometimes have a bad habit of doing so */voidcleanup(){	int	i;	/* ^C in the middle of a client call */	if (svstderr != 2) {		close(2);		dup(svstderr);	}	fprintf(stderr, "server cleaning up...\n");	CLEANUP;	for (i=0; i<64; i++) close(i);	exit(3);}void reinitialize(s)int s;{	/* To force main-while loop call reinitialize_server() after do_select() */	glimpse_reinitialize = 1;#ifdef __svr4__	/* Solaris 2.3 insists that you reset the signal handler */	(void)signal(s, reinitialize);#endif}#define QUITREQUESTMSG "glimpseserver: aborting request...\n"/* S.t. one request doesn't keep server occupied too long, when client already quits */void quitrequest(s)int s;{	/*	 * Don't write onto stderr, since 2 is duped to sockfd => can cause recursive signal!	 * Also, don't print error message more than once for quitting one request. The	 * server receives signals for EVERY write it attempts when it finds a match: I could	 * not find a way to prevent it, but agrep/bitap.c/fill_buf() was fixed to limit it.	 * -- bg on 16th Feb 1995	 */	if (!glimpse_clientdied && (s != SIGUSR1))	/* USR1 is a "friendly" cleanup message */		write(svstderr, QUITREQUESTMSG, strlen(QUITREQUESTMSG));	glimpse_clientdied = 1;#ifdef __svr4__	/* Solaris 2.3 insists that you reset the signal handler */	(void)signal(s, quitrequest);#endif}/* The client receives this signal when an output/input pipe is broken, etc. It simply exits from the current request */void exitrequest(){	glimpse_clientdied = 1;}main(argc, argv)int argc;char *argv[];{	int	ret, tried = 0;	char	indexdir[MAXNAME];	char	**oldargv = argv;	int	oldargc = argc;#if	CLIENTSERVER	int	sockfd, newsockfd, clilen, len, clpid;	int	clout;#if	USE_UNIXDOMAIN	struct sockaddr_un cli_addr, serv_addr;#else	/*USE_UNIXDOMAIN*/	struct sockaddr_in cli_addr, serv_addr;	struct hostent *hp;#endif	/*USE_UNIXDOMAIN*/	int	cli_len;	int	clargc;	char	**clargv;	int	clstdin, clstdout, clstderr;	int	i;	char	array[4];	char	*p, c;#endif	/*CLIENTSERVER*/	int	quitwhile;#if	ISO_CHAR_SET	setlocale(LC_ALL,"");       /* support for 8bit character set: ew@senate.be, Henrik.Martin@eua.ericsson.se */#endif#if	CLIENTSERVER && ISSERVER	glimpse_isserver = 1;	/* I am the server */#else	/*CLIENTSERVER && ISSERVER*/	if (argc <= 1) {		usage();	/* Client nees at least 1 argument */		exit(1);	}#endif	/*CLIENTSERVER && ISSERVER*/#define RETURNMAIN(val)\{\        CLEANUP;\        if (val < 0) exit (2);\	 else if (val == 0) exit (1);\        else exit (0);\}	SERV_HOST = (CHAR *)my_malloc(MAXNAME);#if	!SYSCALLTESTING	/* once-only initialization */	init_filename_hashtable();	src_offset_table = NULL;	for (i=0; i<MAXNUM_PAT; i++) multi_dest_offset_table[i] = NULL;#endif	gethostname(SERV_HOST, MAXNAME - 2);	SERV_PORT = DEF_SERV_PORT;	srand(getpid());	umask(077);	strcpy(&GProgname[0], argv[0]);#if	!SYSCALLTESTING	region_initialize();#endif	indexfp = partfp = minifp = nullfp = NULL;	if ((nullfp = fopen("/dev/null", "w")) == NULL) {		fprintf(stderr, "%s: cannot open for writing: /dev/null, errno=%d\n", argv[0], errno);		RETURNMAIN(-1);	}	InterpretSpecial = ON;	GMAX_WORD_SIZE = MAXPAT;#if	CLIENTSERVER#if	!ISSERVER	/* Install signal handlers so that glimpse doesn't continue to run when pipes get broken, etc. */	if (((void (*)())-1 == signal(SIGPIPE, exitrequest))#ifndef	SCO	    || ((void (*)())-1 == signal(SIGURG, exitrequest))#endif	)	{		/* Check for return values here since they ensure reliability */		fprintf(stderr, "glimpse: Unable to install signal-handlers.\n");		RETURNMAIN(-1);	}	/* Check if client has too many arguments: then it is surely running as agrep since I have < half those options! */	if (argc > MAX_ARGS) goto doityourself;#endif	/*!ISSERVER*/#if	!SYSCALLTESTING	while((--argc > 0) && (*++argv)[0] == '-' ) {		p = argv[0] + 1;	/* ptr to first character after '-' */		c = *(argv[0]+1);		quitwhile = OFF;		while (!quitwhile && (*p != '\0')) {			c = *p;			switch(c) {			/* Look for -H option at server (only one that makes sense); if client has a -H, then it goes to doityourself */			case 'H' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: a directory name must follow the -H option\n", GProgname);						RETURNMAIN(usageS());					}					argv ++;					strcpy(indexdir, argv[0]);					argc --;				}				else {					strcpy(indexdir, p+1);				}				quitwhile = ON;				break;			/* Recognized by both client and server */			case 'J' :				if (*(p + 1) == '\0') {/* space after - option */					if (argc <= 1) {						fprintf(stderr, "%s: the server host name must follow the -J option\n", GProgname);#if	ISSERVER						RETURNMAIN(usageS());#else	/*ISSERVER*/						RETURNMAIN(usage());#endif	/*ISSERVER*/					}					argv ++;					strcpy(SERV_HOST, argv[0]);					argc --;				}				else {					strcpy(SERV_HOST, p+1);				}				quitwhile = ON;				break;			/* Recognized by both client and server */			case 'K' :				if (*(p + 1) == '\0') {/* space after - option */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -