📄 io.c

📁 linux下阅读源码的好工具
💻 C
📖 第 1 页 / 共 4 页
字号:
					patlen += 2;				}				else if (filterbuf[commandpos] != '\'') {					commandpos ++;					patlen ++;				}				else break;			}			if ((commandpos >= len) || (patlen <= 0)) continue;			commandpos ++;		}		else {			patpos = commandpos;			patlen = 0;			while ((commandpos < len) && (filterbuf[commandpos] != ' ') && (filterbuf[commandpos] != '\t')) {				commandpos ++;				patlen ++;			}			while ((commandpos < len) && ((filterbuf[commandpos] == ' ') || (filterbuf[commandpos] == '\t'))) commandpos ++;			if (commandpos >= len) continue;		}		memcpy(tempbuf, &filterbuf[patpos], patlen);		tempbuf[patlen] = '\0';		if ((filter_len[num_filter] = convert2agrepregexp(tempbuf, patlen)) == 0) continue;	/* inplace conversion */		filter[num_filter] = (unsigned char *) strdup(tempbuf);		filter_command[num_filter] = (unsigned char *)strdup(&filterbuf[commandpos]);		num_filter ++;	}	fclose(filterfile);    }    load_dyn_filters(); /* load filters in shared libraries -- CV 9/14/99 */}/* 1 if filter application was successful and the output (>1B) is in outname, 2 if some pattern matched but there is no output, 0 otherwise: sep 15-18 '94 *//* memagrep is initialized in partition.c for calls from dir.c, and it is already done by the time we call this function from main.c */apply_filter(inname, outname)	char	*inname, *outname;	/* outname is in-out, inname is in */{	int	i;	char	name[MAX_LINE_LEN], es1[MAX_LINE_LEN], es2[MAX_LINE_LEN];	int	name_len = strlen(inname);	char	s[MAX_LINE_LEN];	FILE	*dummyout;	FILE	*dummyin;	char	dummybuf[4];	char	prevoutname[MAX_LINE_LEN];	char	newoutname[MAX_LINE_LEN];	char	tempoutname[MAX_LINE_LEN];	char	tempinname[MAX_LINE_LEN];	int	ret = 0;	int	unlink_prevoutname = 0;	if (num_filter <= 0) return 0;	if ((dummyout = fopen("/dev/null", "w")) == NULL) return 0;	/* ready for memgrep */	name[0] = '\n';	special_get_name(inname, name_len, tempinname);	name_len = strlen(tempinname);	strcpy(name+1, tempinname);	strcpy(prevoutname, tempinname);	strcpy(newoutname, outname);	/* Current properly filtered output is always in prevoutname */	for(i=0; i<num_filter; i++) {		if (filter_len[i] > 0) {			char *suffix;			name[name_len + 1] = '\0';			if ((suffix = strstr(name+1, filter[i])) != NULL) {	/* Chris Dalton */				if (ret == 0) ret = 2;				/* yes, it matched: now apply the command and get the output */				/* printf("filtering %s\n", name); */				/* new filter function -- CV 9/14/99 */				apply_one_filter(i, prevoutname, newoutname);				if (((dummyin = my_fopen(newoutname, "r")) == NULL) || (fread(dummybuf, 1, 1, dummyin) <= 0)) {					if (dummyin != NULL) fclose(dummyin);					unlink(newoutname);					continue;				}				/* Filter was successful: output exists and has atleast 1 byte in it */				fclose(dummyin);				if (unlink_prevoutname) {					unlink(prevoutname);					strcpy(tempoutname, prevoutname);					strcpy(prevoutname, newoutname);					strcpy(newoutname, tempoutname);				}				else {					strcpy(prevoutname, newoutname);					sprintf(newoutname, "%s.o", prevoutname);				}				ret = 1;				unlink_prevoutname = 1;#if	1				/* if the matched text was a proper suffix of the name, */				/* remove the suffix just processed before examining the */				/* name again. Chris Dalton */				/* And I don't know what the equivalent thing is with */				/* memagrep_search: since it doesn't return a pointer to */				/* the place where the match occured. Burra Gopal */				if (strcmp(filter[i], suffix) == 0) {					name_len -= strlen(suffix);					*suffix= '\0';				}#endif	/*1*/				if (strlen(newoutname) >= MAX_LINE_LEN - 1) break;			}		}		else {	/* must call memagrep */			name[name_len + 1] = '\n';	/* memagrep wants names to end with '\n': '\0' is not necessary */			/* printf("i=%d filterlen=%d filter=%s inlen=%d input=%s\n", i, -filter_len[i], filter[i], len_current_dir_buf, current_dir_buf); */			if (((filter_len[i] == -2) && (filter[i][0] == '.') && (filter[i][1] == '*')) ||			    (memagrep_search(-filter_len[i], filter[i], name_len + 2, name, 0, dummyout) > 0)) {				if (ret == 0) ret = 2;				/* yes, it matched: now apply the command and get the output */				/* printf("filtering %s\n", name); */				/* new filter function -- CV 9/14/99 */ 				apply_one_filter(i, prevoutname, newoutname);				if (((dummyin = my_fopen(newoutname, "r")) == NULL) || (fread(dummybuf, 1, 1, dummyin) <= 0)) {					if (dummyin != NULL) fclose(dummyin);					unlink(newoutname);					continue;				}				/* Filter was successful: output exists and has atleast 1 byte in it */				fclose(dummyin);				if (unlink_prevoutname) {					unlink(prevoutname);					strcpy(tempoutname, prevoutname);					strcpy(prevoutname, newoutname);					strcpy(newoutname, tempoutname);				}				else {					strcpy(prevoutname, newoutname);					sprintf(newoutname, "%s.o", prevoutname);				}		  		ret = 1;				unlink_prevoutname = 1;				if (strlen(newoutname) >= MAX_LINE_LEN - 1) break;			}		}	}	if (ret == 1) strcpy(outname, prevoutname);	else {	/* dummy filter that copies input to output: caller can use tempinname but this has easy interface */	    /* replaced system() call with a simple copy function. -- CV 9/14/99 */	    copy_file(tempinname, outname);	}	fclose(dummyout);	return ret;}/* Use a modified wais stoplist to do this with simple strcmp's in a for loop */static_stop_list(word)	char	*word;{	return 0;}/* This is the stuff that used to be present in the old build_in.c *//* Some variables used throughout */FILE *TIMEFILE;		/* file descriptor for sorting .glimpse_filenames by time */#if	BG_DEBUGFILE  *LOGFILE; 	/* file descriptor for LOG output */#endif	/*BG_DEBUG*/FILE  *STATFILE;	/* file descriptor for statistical data about indexed files */FILE  *MESSAGEFILE;	/* file descriptor for important messages meant for the user */char  INDEX_DIR[MAX_LINE_LEN];char  sync_path[MAX_LINE_LEN];struct stat istbuf;struct stat excstbuf;struct stat incstbuf;int ICurrentFileOffset;int NextICurrentFileOffset;/* Some options used throughout */int GenerateHash = OFF;int KeepFilenames = OFF;int OneFilePerBlock = OFF;int total_size = 0;int total_deleted = 0;int MAXWORDSPERFILE = 0;int NUMERICWORDPERCENT = DEF_NUMERIC_WORD_PERCENT;int AddToIndex = OFF;int DeleteFromIndex = OFF;int PurgeIndex = ON;int FastIndex = OFF;int BuildDictionary = OFF;int BuildDictionaryExisting = OFF;int CompressAfterBuild = OFF;int IncludeHigherPriority = OFF;int FilenamesOnStdin = OFF;int ExtractInfo = OFF;int InfoAfterFilename = OFF;int FirstWordOfInfoIsKey = OFF;int UseFilters = OFF;int ByteLevelIndex = OFF;int RecordLevelIndex = OFF;	/* When we want a -o like index but want to do booleans on a per-record basis directly from index: robint@zedcor.com */				/* This type of index doesn't make sense with attributes since they span > 1 record; hence StructuredIndex == -2 => this = ON */int StoreByteOffset = OFF;	/* In RecordLevelIndex, store record # for each word or byte offset of the record: record # is the default (12/12/96) */char rdelim[MAX_LINE_LEN];char old_rdelim[MAX_LINE_LEN];int rdelim_len = 0;/* int IndexUnderscore = OFF; */int IndexableFile = OFF;int MAX_INDEX_PERCENT = DEF_MAX_INDEX_PERCENT;int MAX_PER_MB = DEF_MAX_PER_MB;int I_THRESHOLD = DEF_I_THRESHOLD;int BigHashTable = OFF;int IndexEverything = OFF;int HashTableSize = MAX_64K_HASH;int BuildTurbo = OFF;int SortByTime = OFF;int AddedMaxWordsMessage = OFF;int AddedMixedWordsMessage = OFF;int  icount=0; /* count the number of my_malloc for indices structure */int  hash_icount=0; /* to see how much was added to the current hash table */int  save_icount=0; /* to see how much was added to the index by the current file */int  numeric_icount=0; /* to see how many numeric words were there in the current file */int mask_int[32] = MASK_INT;int p_table[MAX_PARTITION];int memory_usage = 0;char *my_malloc(len)    int len;{    char *s;    static int i=100;    if ((s = malloc(len)) != NULL) memory_usage += len;    else fprintf(stderr, "malloc failed after memory_usage = %x Bytes\n", memory_usage);    /* Don't exit since might do traverse here: exit in glimpse though */#if	BG_DEBUG    printf("m:%x ", memory_usage);    i--;    if (i==0) {	printf("\n");	i = 100;    }#endif	/*BG_DEBUG*/    return s;}my_free(ptr, size)	void *ptr;	int size;{    if (ptr) free(ptr);    memory_usage -= size;#if	BG_DEBUG    printf("f:%x ", memory_usage);#endif	/*BG_DEBUG*/}int file_num = 0;int old_file_num = 0;	/* upto what file number should disable list be accessed: < file_num if incremental indexing */int new_file_num = -1;	/* after purging index, how many files are left: for save_data_structures() */int  bp=0;                          /* buffer pointer */unsigned char word[MAX_WORD_BUF];int FirstTraverse1 = ON;struct  indices *ip;/* Globals used in merge, and also in glimpse's get_index.c */unsigned int *src_index_set = NULL;unsigned int *dest_index_set = NULL;unsigned char *src_index_buf = NULL;unsigned char *dest_index_buf = NULL;unsigned char *merge_index_buf = NULL;/* * Routines for zonal memory allocation for glimpseindex and very fast search in glimpse. */int next_free_token = 0;struct token *free_token = NULL; /*[I_THRESHOLD/AVG_OCCURRENCES]; */int next_free_indices = 0;struct indices *free_indices = NULL; /*[I_THRESHOLD]; */int next_free_word = 0;char *free_word = NULL; /*[I_THRESHOLD/AVG_OCCURRENCES * AVG_WORD_LEN]; */extern int usemalloc;/* * The beauty of this allocation scheme is that "free" does not need to be implemented! */tokenallfree(){	next_free_token = 0;}tokenfree(e, len)struct token *e;int len;{	if (usemalloc) my_free(e, sizeof(struct token));}struct token *tokenalloc(len)int	len;{	struct token *e;	if (usemalloc) (e) = (struct token *)my_malloc(sizeof(struct token));	else {		if (free_token == NULL) free_token = (struct token *)my_malloc(sizeof(struct token) * I_THRESHOLD / INDICES_PER_TOKEN);		if (free_token == NULL) {fprintf(stderr, "malloc failure in tokenalloc()\n"); exit(2);}		else (e) = ((next_free_token >= I_THRESHOLD / INDICES_PER_TOKEN) ? (NULL) : (&(free_token[next_free_token ++])));	}	return e;}indicesallfree(){	next_free_indices = 0;}indicesfree(e, len)struct indices *e;int len;{	if (usemalloc) my_free(e, sizeof(struct indices));}struct indices *indicesalloc(len)int	len;{	struct indices *e;	if (usemalloc) (e) = (struct indices *)my_malloc(sizeof(struct indices));	else {		if (free_indices == NULL) free_indices = (struct indices *)my_malloc(sizeof(struct indices) * I_THRESHOLD);		if (free_indices == NULL) {fprintf(stderr, "malloc failure in indicesalloc()\n"); exit(2);}		else (e) = ((next_free_indices >= I_THRESHOLD) ? (NULL) : (&(free_indices[next_free_indices ++])));	}	return e;}/* For words in a token structure */wordallfree(){	next_free_word = 0;}wordfree(s, len)char *s;int len;{	if (usemalloc) my_free(s, len);}char *wordalloc(len)int	len;{	char *s;	if (usemalloc) (s) = (char *)my_malloc(len);	else {		if (free_word == NULL) free_word = (char *)my_malloc(AVG_WORD_LEN * I_THRESHOLD/INDICES_PER_TOKEN);		if (free_word == NULL) {fprintf(stderr, "malloc failure in wordalloc()\n"); exit(2); }		else (s) = ((next_free_word + len + 2 >= AVG_WORD_LEN * I_THRESHOLD/INDICES_PER_TOKEN) ? (NULL) : (&(free_word[next_free_word])));		if (s != NULL) next_free_word += (len);		/* 2 for 1 char word with '\0' */	}	return s;}struct mini *mini_array = NULL;int mini_array_len = 0;#if	WORD_SORTED/* * Routines that operate on the index using the mini-index. * * The index is a list of words+delim+attr+offset+\n sorted * by the word (using strcmp). * * The mini-index keeps track of the offsets in the index * where every WORDS_PER_REGION-th word in the index occurs. * There is no direct way for glimpse to seek into the mini * file for the exact offset of this word since unlike hash * values words are of variable length. * * This is small enough to be kept in memory and searched * directly with full word case insensitive string compares * with binary search. For 256000 words in index there will be * 256000/128 = 2000 words in mini-index that will occupy * 2000*32 (avgword + off + delim/attr + sizeof(struct mini)), * which is less than 16 pages (can always be resident in mem). * * We just need to string search log_2(2000) + 128 words of * length 12B each in the worst case ===> VERY FAST. This is * not the best possible but space is the limit. If we hash the * whole index/regions in the index, we need TOO MUCH memory. *//*
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -