⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dir.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
	}	/* Else lstat has all the requisite information *//* Removed on 16/Feb/1996 becuase changed type returned by lib_fstat to S_IFLNK#if	SFS_COMPAT	if ((stbuf.st_spare1 & FS_TYPEMASK) == FS_LINK) return 0;#endif*/	if ((stbuf.st_mode & S_IFMT) == S_IFLNK)  {		/* if (IndexableFile) return 0; ---> not correct! must process include/exclude with -I too */		PROCESS_INCLUDE;		if (!force_include) {#if	BG_DEBUG			fprintf(LOGFILE, "%s is a symbolic link -- not indexing\n", name);#endif	/*BG_DEBUG*/			PROCESS_EXIT;			return 0;		}		if (-1 == my_stat(name, &stbuf)) {#if	BG_DEBUG			fprintf(LOGFILE, "cannot find target of symbolic link %s -- not indexing\n", name);#endif	/*BG_DEBUG*/			PROCESS_EXIT;			return 0;		}	}	else /* if (!IndexableFile) ---> not correct! must process include/exclude with -I too */ {		/* Put exclude include processing here... stat all the time: that is faster than former! */		if (FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1)) {			/* Don't process exclude/include if the file `name' is older then the index AND the exclude/include file is older then the index */			if (IncludeHigherPriority) {				if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)))					PROCESS_INCLUDE;				if (!force_include && !((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime)))					PROCESS_EXCLUDE;			}			else {				if (!((stbuf.st_ctime <= istbuf.st_ctime) && (excstbuf.st_ctime <= istbuf.st_ctime)))					PROCESS_EXCLUDE;				if (!((stbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)))					PROCESS_INCLUDE;			}			if (!((stbuf.st_ctime <= istbuf.st_ctime) && (filstbuf.st_ctime <= istbuf.st_ctime)))				PROCESS_FILTER;		}		else {	/* Either AddToIndex or fresh indexing or previously excluded file: process exclude and include */			if (IncludeHigherPriority) {				PROCESS_INCLUDE;				if (!force_include)					PROCESS_EXCLUDE;			}			else {				PROCESS_EXCLUDE;				PROCESS_INCLUDE;			}			PROCESS_FILTER;		}	}	/* Here, the file exists and has not been excluded -- possibly has been included */index_everything:	if ((stbuf.st_mode & S_IFMT) == S_IFDIR) {		if (-1 == fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc)) return -1;	}        else if ((stbuf.st_mode & S_IFMT) == S_IFREG) {	/* regular file */	    if (IndexableFile) {		if (!filetype(name, IndexEverything?2:1, NULL, NULL)) printf("%s\n", name);		return 0;	    }	    if (DeleteFromIndex) {		if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) {		    remove_filename(fileindex, new_partition);		}		/* else doesn't exist in index, so doesn't matter */		return 0;	    }	    file_id ++;	    if (BuildDictionaryExisting) {		/* Don't even store the names of the files that are not uncompressible */		if (file_num >= MaxNum24bPartition) {		    fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MaxNum24bPartition);		    return -1;		}	        if (tuncompress_file(name, outname, TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT) <= 0) return 0;		file_num++;		t1 = (char *) my_malloc(strlen(outname) + 2);		strcpy(t1, outname);		/* name_list[ndx] = t1; */		LIST_ADD(name_list, ndx, t1, char*);		/* size_list[ndx] = stbuf.st_size;*/		LIST_ADD(size_list, ndx, stbuf.st_size, int);		ndx ++;		return 0;	    }#ifdef SW_DEBUG	    printf("%s: ", name);#endif	    if (AddToIndex || FastIndex) {		if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) {		    LIST_ADD(size_list, fileindex, stbuf.st_size, int);		    if (FastIndex && (stbuf.st_ctime <= istbuf.st_ctime))			disable_list[block2index(fileindex)] |= mask_int[fileindex % (8*sizeof(int))];		    else { /* AddToIndex or file was modified (=> its type might have changed!) */			if (filetype(name, IndexEverything?2:1, &xinfo_len, xinfo)) {			    if (!force_include) {				remove_filename(fileindex, new_partition);				return 0;			    }			    else {#if	BG_DEBUG				fprintf(LOGFILE, "overriding and indexing: %s\n", name);#endif	/*BG_DEBUG*/			    }			}			if (ExtractInfo && (xinfo_len > 0)/* && (special_get_name(name, name_len, temp) != -1) NOT NEEDED since name is got from UNIX */) {			    my_free(LIST_SUREGET(name_list, fileindex));			    t1 = (char *)my_malloc(strlen(name) + xinfo_len + 3);			    strcpy(t1, name);			    strcat(t1, " ");			    strcat(t1, xinfo);			    LIST_ADD(name_list, fileindex, t1, char*);			    change_filename(name, name_len, fileindex, t1);			}			disable_list[block2index(fileindex)] &= ~(mask_int[fileindex % (8*sizeof(int))]);		    }		}		else {	/* new file not in filenames so no point in checking */		    if(filetype(name, IndexEverything?2:1, &xinfo_len, xinfo)) {			if (!force_include) return 0;			else {#if	BG_DEBUG				fprintf(LOGFILE, "overriding and indexing: %s\n", name);#endif	/*BG_DEBUG*/			}		    }		    if (file_num >= MaxNum24bPartition) {			fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MaxNum24bPartition);			return -1;		    }		    if (ExtractInfo && (xinfo_len > 0)) {			t1 = (char *)my_malloc(strlen(name) + xinfo_len + 3);			strcpy(t1, name);			strcat(t1, " ");			strcat(t1, xinfo);		    }		    else {			t1 = (char *)my_malloc(strlen(name) + 2);			strcpy(t1, name);		    }		    /* name_list[file_num] = t1; */		    LIST_ADD(name_list, file_num, t1, char*);		    /* size_list[file_num] = stbuf.st_size; */		    LIST_ADD(size_list, file_num, stbuf.st_size, int);		    insert_filename(LIST_GET(name_list, file_num), file_num);		    file_num ++;		    if (!OneFilePerBlock) {		        if (files_in_partition + 1 > files_per_partition) {			    if (new_partition + 1 > MaxNumPartition) {				if (!printed_warning) {				    printed_warning = 1;				    if (AddToIndex) {					fprintf(MESSAGEFILE, "Warning: partition-table overflow! Fresh indexing recommended.n");				    }				    else {					fprintf(MESSAGEFILE, "Warning: partition-table overflow! Commencing fresh indexing...\n");					return -1;				    }				}			    }			    else new_partition++;			    files_in_partition = 0;			    /* so that we don't get into this if-branch until another files_per_partition new files are seen */			}			p_table[new_partition] = file_num;			files_in_partition ++;		    }		}	    }	    else { /* Fresh indexing: very simple -- add everything */		if(filetype(name, IndexEverything?2:1, &xinfo_len, xinfo)) {		    if (!force_include) return 0;		    else {#if	BG_DEBUG			fprintf(LOGFILE, "overriding and indexing: %s\n", name);#endif	/*BG_DEBUG*/		    }		}		if (file_num >= MaxNum24bPartition) {		    fprintf(stderr, "Too many files in index: indexing the first %d only.\n", MaxNum24bPartition);		    return -1;		}		if (SortByTime) fprintf(TIMEFILE, "%ld %d\n", stbuf.st_mtime, file_num);		file_num++;		if (ExtractInfo && (xinfo_len > 0)) {		    t1 = (char *)my_malloc(strlen(name) + xinfo_len + 3);		    strcpy(t1, name);		    strcat(t1, " ");		    strcat(t1, xinfo);		}		else {		    t1 = (char *) my_malloc(strlen(name) + 2);		    strcpy(t1, name);		}		/* name_list[ndx] = t1; */		LIST_ADD(name_list, ndx, t1, char*);		/* size_list[ndx] = stbuf.st_size; */		LIST_ADD(size_list, ndx, stbuf.st_size, int);		ndx++;	    }        }	return 0;}/* uses the space in the same "name" to get names of files in that directory and calls fsize *//* pat, pat_len, num_pat, inc, inc_len, num_inc are just used for recursive calls to fsize *//* special_get_name() doesn't have to be done since glimpseindex indexes just files, not directories, so dir's have no URL information, etc. */fsize_directory(name, pat, pat_len, num_pat, inc, inc_len, num_inc)char *name;char **pat;int *pat_len;int  num_pat;char **inc;int *inc_len;int  num_inc;{	struct dirent *dp;	char *nbp, *nep;	int i;	DIR *dirp;	/*	printf("in fsize_directory, name= %s\n",name);	*/	if ((name == NULL) || (*name == '\0')) return 0;	nbp = name + strlen(name);	if( nbp+DIRSIZE+2 >= name+BUFSIZE ) /* name too long */	{       fprintf(stderr, "name too long: %s\n", name);		return 0;	}        if((dirp = opendir(name)) == NULL) {		fprintf(stderr, "permission denied or non-existent directory: %s\n", name);		return 0;	}	*nbp++ = '/';        for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {        	if (dp->d_name[0] == '\0' || strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue;		for(i=0, nep=nbp; (dp->d_name[i] != '\0') && (nep < name+BUFSIZ-1); i++)			*nep++ = dp->d_name[i];		if (dp->d_name[i] != '\0') {			*nep = '\0';			fprintf(stderr, "name too long: %s\n", name);			continue;		}		*nep = '\0';		/*		printf("name= %s\n", name);		*/		if (-1 == fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, 0)) return -1;	}        closedir (dirp);	*--nbp = '\0'; /* restore name */	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -