⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dir.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal.  All Rights Reserved. *//* ./glimpse/index/dir.c *//* The function of the program is to traverse the   direcctory tree and print the size of the files in the tree.   This program is derived from the C-programming language book   It opens a directory file using opendir system call, and use readdir()   to read each entry of the directory.*/#include "autoconf.h"	/* ../libtemplate/include */#include <stdio.h>#include <sys/types.h>#if HAVE_DIRENT_H# include <dirent.h># define NAMLEN(dirent) strlen((dirent)->d_name)#else# define dirent direct# define NAMLEN(dirent) (dirent)->d_namlen# if HAVE_SYS_NDIR_H#  include <sys/ndir.h># endif# if HAVE_SYS_DIR_H#  include <sys/dir.h># endif# if HAVE_NDIR_H#  include <ndir.h># endif#endif#include <sys/stat.h>#include <fcntl.h>#define BUFSIZE 256#define DIRSIZE 14#include "glimpse.h"#undef MAX_LIST#define MAX_LIST 100000/* Removed on 16/Feb/1996 becuase changed type returned by lib_fstat to S_IFLNK#if	SFS_COMPAT#define FS_TYPEMASK	0x700000#define FS_LINK		0x300000#endif*/extern FILE *TIMEFILE;#if	BG_DEBUGextern FILE *LOGFILE;#endif	/*BG_DEBUG*/extern FILE *MESSAGEFILE;int ndx = 0;                    /* file index */extern char **name_list[MAXNUM_INDIRECT];  /* store the file names */extern int  *size_list[MAXNUM_INDIRECT];   /* store the sizes of the files */extern unsigned int *disable_list;   /* store whether to DISABLE indexing or not: only with FastIndex or AddToIndex */extern int  file_num;extern int  file_id;	/* borrowed from filetype.c */extern char INDEX_DIR[MAX_LINE_LEN];extern int AddToIndex;extern int DeleteFromIndex;extern int FastIndex;extern int OneFilePerBlock;extern int IncludeHigherPriority;extern int BuildDictionaryExisting;extern int IndexEverything;extern int printed_warning;extern int SortByTime;extern int p_table[];extern FILE *STATFILE;extern int ExtractInfo;extern int IndexableFile;extern int files_per_partition;extern int new_partition;extern int files_in_partition;extern struct stat istbuf;	/* imported from glimpse.c */extern int memory_usage;extern int mask_int[];extern char exin_argv[8];extern int exin_argc;extern char current_dir_buf[2*MAX_LINE_LEN + 4];	/* must have space to store pattern after directory name */extern unsigned char dummypat[MAX_PAT];extern int dummylen;extern FILE *dummyout;extern struct stat excstbuf;extern struct stat incstbuf;extern struct stat filstbuf;extern int num_filter;extern int filter_len[MAX_FILTER];extern CHAR *filter[MAX_FILTER];extern CHAR *filter_command[MAX_FILTER];/* * Exclude/Include priorities with exclude > include (IncludeHigherPriority = OFF = default): * 1. Command line arguments (inclusion --> exclude list is never applied) * 2. Exclude list (exclusion) * 3. Include list (inclusion) * 4. Symbolic links (exclusion) * 5. filter processing (inclusion --> so that binary files that can be filtered are not excluded) * 6. filetype (exclusion) *  * Exclude/Include priorities with include > exclude (IncludeHigherPriority = ON = -i): * 1. Command line arguments (inclusion --> exclude list is never applied) * 2. Include list (inclusion) * 3. Symbolic links (exclusion --> applying exclude list is unnecessary: optimization) * 4. Exclude list (exclusion) * 5. filter processing (inclusion --> so that binary files that can be filtered are not excluded) * 6. filetype (exclusion) */char outname[MAX_LINE_LEN];char inname[MAX_LINE_LEN];fsize(name, pat, pat_len, num_pat, inc, inc_len, num_inc, toplevel)char *name;char **pat;int *pat_len;int num_pat;char **inc;int *inc_len;int num_inc;int toplevel;{	struct stat stbuf;	int i;	int fileindex = -1;	int force_include = 0;	int len_current_dir_buf = strlen(current_dir_buf) + 1;	/* includes the '\0' which is going to be replaced by '\n' below */	int name_len;	char *t1;	char xinfo[MAX_LINE_LEN], temp[MAX_LINE_LEN];	int xinfo_len = 0;	if ((name == NULL) || (*name == '\0')) return 0;	name_len = strlen(name);	/* name[name_len] is '\0' */#ifdef	SW_DEBUG	printf("num_pat= %d num_inc= %d\n", num_pat, num_inc);	printf("name= %s\n", name);#endif	/*	 * Find out what to exclude, what to include and skip	 * over symbolic links that don't HAVE to be included.	 * Some Extra get_filename_index calls are done but	 * that won't cost you anything (just #ing twice).	 */	/* Check if cache set in glimpse.c is correct */	if (!IndexableFile && !DeleteFromIndex && FastIndex && ((fileindex = get_filename_index(name, name_list, file_num)) != -1) && (disable_list[block2index(fileindex)] & mask_int[fileindex % (8*sizeof(int))])) {	    if (num_pat <= 0) {		if (num_inc <= 0) return 0;		else if (incstbuf.st_ctime <= istbuf.st_ctime) return 0;	    }	    else {		if (num_inc <= 0) {			if (excstbuf.st_ctime <= istbuf.st_ctime) return 0;		}		else if ((excstbuf.st_ctime <= istbuf.st_ctime) && (incstbuf.st_ctime <= istbuf.st_ctime)) return 0;	    }	}#define PROCESS_EXIT \{\	if (AddToIndex || FastIndex || DeleteFromIndex) {\		if ((fileindex = get_filename_index(name, name_list, file_num)) != -1) \			remove_filename(fileindex, new_partition);\	}\}#define PROCESS_EXCLUDE \{\	if (!toplevel) for(i=0; i<num_pat; i++) {	/* bg: 15/mar/94 */\		if (pat_len[i] > 0) {\			name[name_len] = '\0';\			if (strstr(name, pat[i]) != NULL) {\				PROCESS_EXIT;\				return 0;\			}\		}\		else {	/* must call memagrep */\			int	ret;\			name[name_len] = '\n';	/* memagrep wants names to end with '\n': '\0' is not necessary */\			/* printf("i=%d patlen=%d pat=%s inlen=%d input=%s\n", i, -pat_len[i], pat[i], len_current_dir_buf, current_dir_buf); */\			if (((pat_len[i] == -2) && (pat[i][0] == '.') && (pat[i][1] == '*')) ||\			    ((ret = memagrep_search(-pat_len[i], pat[i], len_current_dir_buf, current_dir_buf, 0, dummyout)) > 0))\			{\				/* printf("excluding with %d %s\n", ret, name); */\				name[name_len] = '\0';	/* restore */\				PROCESS_EXIT;\		  		return 0; \			}\			/* else printf("ret=%d\n");*/\		}\	}\	name[name_len] = '\0';\}#define PROCESS_INCLUDE \{\ 	/*\	 * When include has higher priority, we want to include directories\	 * by default and match the include patterns only against filenames.\	 * Based on bug reports for glimpse-2.1. bg: 2/mar/95.\ 	 */\ 	if (IncludeHigherPriority && ((stbuf.st_mode & S_IFMT) == S_IFDIR)) force_include = 1;\ 	else for (i=0; i<num_inc; i++) {	/* bg: 15/mar/94 */\		if (inc_len[i] > 0) {\			name[name_len] = '\0';\			if (strstr(name, inc[i]) != NULL) {\				force_include = 1;\				break;\			}\		}\		else {	/* must call memagrep */\			name[name_len] = '\n';	/* memagrep wants names to end with '\n': '\0' is not necessary */\			/* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\			if (((inc_len[i] == -2) && (inc[i][0] == '.') && (inc[i][1] == '*')) ||\			    (memagrep_search(-inc_len[i], inc[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\			{\				force_include = 1;\				break;\			}\		}\	}\	name[name_len] = '\0';	/* restore */\	if (toplevel) force_include = 1;\}#define PROCESS_FILTER \{\	/*\	 * Filters should be processed independent of .include since they might have to be\	 * excluded first. However, they must be processed before filetype since legitimate\	 * files like *.Z might be excluded by it. Based on bug reports for glimpse-3.5: bg: 11/Apr/96.\	 */\	if (!force_include) for (i=0; i<num_filter; i++) {	/* bg: 16/sep/94 */\		if (filter_len[i] > 0) {\			name[name_len] = '\0';\			if (strstr(name, filter[i]) != NULL) {\				force_include = 1;\				break;\			}\		}\		else {	/* must call memagrep */\			name[name_len] = '\n';	/* memagrep wants names to end with '\n': '\0' is not necessary */\			/* printf("pat=%s input=%s\n", pat[i], current_dir_buf); */\			if (((filter_len[i] == -1) && (filter[i][0] == '.') && (filter[i][1] == '*')) ||\			    (memagrep_search(-filter_len[i], filter[i], len_current_dir_buf, current_dir_buf, 0, dummyout) > 0))\			{\				force_include = 1;\				break;\			}\		}\	}\	name[name_len] = '\0';	/* restore */\}        if(my_lstat(name, &stbuf) == -1) {		if (IndexableFile) return 0;		/* Can happen for command line arguments, not stuff obtained from fsize_directory() */#if	BG_DEBUG		fprintf(LOGFILE, "cannot find %s -- not indexing\n", name);#endif	/*BG_DEBUG*/		PROCESS_EXIT;		return 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -