📄 io.c

📁 linux下阅读源码的好工具
💻 C
📖 第 1 页 / 共 4 页
字号:
 * Binary search mini_array[beginindex..endindex); return 1 if success, 0 if failure. * Sets begin and end offsets for direct search; initially beginindex=0, endindex=mini_array_len */intget_mini(word, len, beginoffset, endoffset, beginindex, endindex, minifp)	unsigned char *word;	int	len;	long	*beginoffset, *endoffset;	int	beginindex, endindex;	FILE	*minifp;{	int	cmp, midindex;	if ((mini_array == NULL) || (mini_array_len <= 0)) return 0;	midindex = beginindex + (endindex - beginindex)/2;	cmp = strcmp(word, mini_array[midindex].word);	if (cmp < 0) {	/* word DEFINITELY BEFORE midindex (but still at or after beginindex) */		if (beginindex >= midindex) {	/* range of search is just ONE element in array */			*beginoffset = mini_array[midindex].offset;			if (midindex + 1 < mini_array_len) {				*endoffset = mini_array[midindex + 1].offset;			}			else *endoffset = -1;	/* go till end of file */			return 1;		}		else return get_mini(word, len, beginoffset, endoffset, beginindex, midindex);	}	else {	/* word DEFINITELY AT OR AFTER midindex (but still before endindex) */		if ((cmp == 0) || (endindex <= midindex + 1)) {	/* range of search is just ONE element in array */			*beginoffset = mini_array[midindex].offset;			if (midindex + 1 < mini_array_len) {				*endoffset = mini_array[midindex + 1].offset;			}			else *endoffset = -1;	/* go till end of file */			return 1;		}		else return get_mini(word, len, beginoffset, endoffset, midindex, endindex);	}}/* Returns: #of words in mini_array if success or already read, -1 if failure */intread_mini(indexfp, minifp)	FILE	*indexfp, *minifp;	/* indexfp pointing right to first line of word+... */{	unsigned char	s[MAX_LINE_LEN], word[MAX_NAME_LEN];	int	wordnum = 0, wordlen;	long	offset;	struct stat st;	if ((mini_array != NULL) && (mini_array_len > 0)) return mini_array_len;	if (minifp == NULL) return 0;	if (fstat(fileno(minifp), &st) == -1) {		fprintf(stderr, "Can't stat: %s\n", s);		return -1;	}	rewind(minifp);	fscanf(minifp, "%d\n", &mini_array_len);	if ((mini_array_len <= 0) || (mini_array_len > (st.st_size / 4 /* \n, space, 1char offset, 1char word */))) {		fprintf(stderr, "Error in format of: %s\n", s);		return -1;	}	mini_array = (struct mini *)my_malloc(sizeof(struct mini) * mini_array_len);	memset(mini_array, '\0', sizeof(struct mini) * mini_array_len);	while ((wordnum < mini_array_len) && (fscanf(minifp, "%s %ld\n", word, &offset) != EOF)) {		wordlen = strlen((char *)word);		mini_array[wordnum].word = (char *)my_malloc(wordlen + 2);		strcpy((char *)mini_array[wordnum].word, (char *)word);		mini_array[wordnum].offset = offset;		wordnum ++;	}	return mini_array_len;}dump_mini(indexfile)	char	*indexfile;{	unsigned char	s[MAX_LINE_LEN], word[MAX_NAME_LEN];	FILE	*indexfp;	FILE	*minifp;	int	wordnum = 0, j, attr_num;	long	offset;	/* offset if offset of beginning of word */	char	temp_rdelim[MAX_LINE_LEN];	temp_rdelim[0] = '\0';  /* Initialize just in case. 10/25/99 --GV */	if ((indexfp = fopen(indexfile, "r")) == NULL) {		fprintf(stderr, "Can't open for reading: %s\n", indexfile);		return;	}	sprintf(s, "%s/%s.tmp", INDEX_DIR, MINI_FILE);	if ((minifp = fopen(s, "w")) == NULL) {		fprintf(stderr, "Can't open for writing: %s\n", s);		fclose(indexfp);		return;	}	fgets(s, 256, indexfp);	/* indexnumbers */	fgets(s, 256, indexfp);	/* onefileperblock */	fscanf(indexfp, "%%%d%s\n", &attr_num, temp_delim);	/* structured index */	offset = ftell(indexfp);	while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) {		if ((wordnum % WORDS_PER_REGION) == 0) {			j = 0;			while ((j < MAX_LINE_LEN) && (s[j] != WORD_END_MARK) && (s[j] != ALL_INDEX_MARK) && (s[j] != '\n')) j++;			if ((j >= MAX_LINE_LEN) || (s[j] == '\n')) {				wordnum ++;				offset = ftell(indexfp);				continue;			}			/* else it is WORD_END_MARK or ALL_INDEX_MARK */			s[j] = '\0';			strcpy((char *)word, (char *)s);			if (fprintf(minifp, "%s %ld\n", word, offset) == EOF) {				fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);				break;			}			mini_array_len ++;		}		wordnum ++;		offset = ftell(indexfp);	}	fclose(indexfp);	fflush(minifp);	fclose(minifp);	/*	 * Add amount of space needed for mini_array at the beginning	 */	sprintf(s, "%s/%s", INDEX_DIR, MINI_FILE);	if ((minifp = fopen(s, "w")) == NULL) {		fprintf(stderr, "Can't open for writing: %s\n", s);		goto end;	}	sprintf(s, "%s/%s.tmp", INDEX_DIR, MINI_FILE);	if ((indexfp = fopen(s, "r")) == NULL) {		fprintf(stderr, "Can't open for reading: %s\n", s);		fclose(minifp);		goto end;	}	fprintf(minifp, "%d\n", mini_array_len);	while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) {		fputs(s, minifp);	}	fflush(minifp);	fclose(minifp);end:	sprintf(s, "%s/%s.tmp", INDEX_DIR, MINI_FILE);	unlink(s);	return;}#else	/* WORD_SORTED */intget_mini(word, len, beginoffset, endoffset, beginindex, endindex, minifp)	unsigned char *word;	int	len;	long	*beginoffset, *endoffset;	int	beginindex, endindex;	FILE	*minifp;{	int	index;	unsigned char array[sizeof(int)];	extern int	glimpse_isserver;	/* in agrep/agrep.c */	index = hash64k(word, len);	if ((mini_array == NULL) || (mini_array_len <= 0) || !glimpse_isserver) {		if (minifp == NULL) return 0;		fseek(minifp, (long)(index*sizeof(int)), 0);		if (fread((void *)array, sizeof(int), 1, minifp) != 1) return 0;		*beginoffset = decode32b((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]);		if (fread((void *)array, sizeof(int), 1, minifp) != 1)			*endoffset = -1;		else *endoffset = decode32b((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]);		return 1;	}	*beginoffset = mini_array[index].offset;	if (index + 1 < endindex)		*endoffset = mini_array[index + 1].offset;	else *endoffset = -1;	return 1;}/* Returns: #of words in mini_array if success or already read, -1 if failure */intread_mini(indexfp, minifp)	FILE	*indexfp, *minifp;	/* indexfp pointing right to first line of word+... */{	unsigned char	s[MAX_LINE_LEN], array[sizeof(int)];	int	offset, hash_value;	if ((mini_array != NULL) && (mini_array_len > 0)) return mini_array_len;	if (minifp == NULL) return 0;	rewind(minifp);	mini_array_len = MINI_ARRAY_LEN;	mini_array = (struct mini *)my_malloc(sizeof(struct mini) * mini_array_len);	memset(mini_array, '\0', sizeof(struct mini) * mini_array_len);	hash_value = 0;	/* line# I am going to scan */	offset = 0;	while ((hash_value < MINI_ARRAY_LEN) && (fread((void *)array, sizeof(int), 1, minifp) == 1)) {		offset = (array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3];		mini_array[hash_value++].offset = decode32b(offset);	}	for (; hash_value<MINI_ARRAY_LEN; hash_value++)		mini_array[hash_value].offset = -1;	/* end of index file */	return mini_array_len;}/* * 1. Find hash64k values of each word. Then fprintf it before the word and put it *    in another file. Sort it and put that as the real index. * 2. Then in the new index, dump offsets after stripping the hash value out, and *    dump the offset at the hash_value-th line into the mini file. * 3. The only problem is that the offsets obtained from the index into the parti- *    tions won't be in increasing order, but who cares? get_block_numbers() works! * 4. In merge_splits(), we have to re-sort everything by word for add-to-index *    and fast-index to work properly. */dump_mini(indexfile)	char	*indexfile;{	unsigned char	s[MAX_LINE_LEN], *t, word[MAX_NAME_LEN], c;	unsigned char	indexnumber[MAX_LINE_LEN], onefileperblock[MAX_LINE_LEN];	int	attr_num, linelen;	FILE	*indexfp;	FILE	*newindexfp;	FILE	*minifp;	long	offset;	/* offset if offset of beginning of word */	int	eoffset, j, hash_value, prev_hash_value;	/* NOT shorts!! */	int	rc;	/* return code from system(3) */	char	es1[MAX_LINE_LEN], es2[MAX_LINE_LEN], es3[MAX_LINE_LEN], temp_rdelim[MAX_LINE_LEN];	temp_rdelim[0] = '\0';  /* Initialize in case not read. 10/25/99 --GV */	/*	 * First change the sorting order of the index file.	 */	if ((indexfp = fopen(indexfile, "r")) == NULL) {		fprintf(stderr, "Can't open for reading: %s\n", indexfile);		exit(2);	}	sprintf(s, "%s.tmp", indexfile);	if ((newindexfp = fopen(s, "w")) == NULL) {		fprintf(stderr, "Can't open for writing: %s\n", s);		fclose(indexfp);		exit(2);	}	/* Must store since sort -n can screw it up */	fgets(indexnumber, 256, indexfp);	fgets(onefileperblock, 256, indexfp);	if ( !fscanf(indexfp, "%%%d%s\n", &attr_num, temp_rdelim)) 		fscanf(indexfp, "%%%d\n", &attr_num);	while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) {		j = 0;		linelen = strlen(s);		while ((j < linelen) && (s[j] != WORD_END_MARK) && (s[j] != ALL_INDEX_MARK) && (s[j] != '\n') && (s[j] != '\0')) j++;		if ((j >= linelen) || (s[j] == '\n') || (s[j] == '\0')) {			continue;		}		/* else it is WORD_END_MARK or ALL_INDEX_MARK */		c = s[j];		s[j] = '\0';		hash_value = hash64k(s, j);		s[j] = c;		fprintf(newindexfp, "%d ", hash_value);		if (fputs(s, newindexfp) == EOF) {			fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);			exit(2);		}	}	fclose(indexfp);	fflush(newindexfp);	fclose(newindexfp);#if	SFS_COMPAT	unlink(indexfile);#else	sprintf(s, "exec %s '%s'", SYSTEM_RM, escapesinglequote(indexfile, es1));	system(s);#endif#if	DONTUSESORT_T_OPTION || SFS_COMPAT	sprintf(s, "exec %s -n '%s.tmp' > '%s'\n", SYSTEM_SORT, escapesinglequote(indexfile, es1), escapesinglequote(indexfile, es2));#else	sprintf(s, "exec %s -n -T '%s' '%s.tmp' > '%s'\n", SYSTEM_SORT, escapesinglequote(INDEX_DIR, es1), escapesinglequote(indexfile, es2), escapesinglequote(indexfile, es3));#endif	rc = system(s);	if (rc >> 8) {		fprintf (stderr, "'sort' command:\n");		fprintf (stderr, "    %s\n", s);		fprintf (stderr, "failed with exit status %d\n", rc>>8);		exit(2);	}#if	SFS_COMPAT	sprintf(s, "%s.tmp", indexfile);	unlink(s);#else	sprintf(s, "exec %s '%s.tmp'", SYSTEM_RM, escapesinglequote(indexfile, es1));	system(s);#endif	system(sync_path);	/* sync() has a BUG */	/*	 * Now dump the mini-file's offsets and create the stripped index file	 */	if ((indexfp = fopen(indexfile, "r")) == NULL) {		fprintf(stderr, "Can't open for reading: %s\n", indexfile);		exit(2);	}	sprintf(s, "%s.tmp", indexfile);	if ((newindexfp = fopen(s, "w")) == NULL) {		fprintf(stderr, "Can't open for writing: %s\n", s);		fclose(indexfp);		exit(2);	}	sprintf(s, "%s/%s", INDEX_DIR, MINI_FILE);	if ((minifp = fopen(s, "w")) == NULL) {		fprintf(stderr, "Can't open for writing: %s\n", s);		fclose(indexfp);		fclose(newindexfp);		exit(2);	}	fputs(indexnumber, newindexfp);	fputs(onefileperblock, newindexfp);	if (attr_num != -2) fprintf(newindexfp, "%%%d\n", attr_num);	else fprintf(newindexfp, "%%%d %s\n", attr_num, temp_rdelim);	prev_hash_value = -1;	hash_value = 0;	offset = ftell(newindexfp);	while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) {		linelen = strlen(s);		t = s;		while ((*t != ' ') && (t < s + linelen)) t++;		if (t >= s + linelen) continue;		*t = '\0';		sscanf(s, "%d", &hash_value);		t ++;	/* points to first character of the beginning of s */		fputs(t, newindexfp);		if (hash_value != prev_hash_value) {			for (j=prev_hash_value + 1; j<=hash_value; j++) {				eoffset = encode32b((int)offset);				putc((eoffset & 0xff000000) >> 24, minifp);				putc((eoffset & 0xff0000) >> 16, minifp);				putc((eoffset & 0xff00) >> 8, minifp);				if (putc((eoffset & 0xff), minifp) == EOF) {					fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);					exit(2);				}			}			prev_hash_value = hash_value;		}		offset = ftell(newindexfp);	}	for (hash_value = prev_hash_value + 1; hash_value<MINI_ARRAY_LEN; hash_value++) {		eoffset = encode32b((int)offset);	/* end of index file */		putc((eoffset & 0xff000000) >> 24, minifp);		putc((eoffset & 0xff0000) >> 16, minifp);		putc((eoffset & 0xff00) >> 8, minifp);		if (putc((eoffset & 0xff), minifp) == EOF) {			fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__);			exit(2);		}	}	fclose(indexfp);	fflush(newindexfp);	fclose(newindexfp);	fflush(minifp);	fclose(minifp);#if	SFS_COMPAT	unlink(indexfile);#else	sprintf(s, "exec %s '%s'", SYSTEM_RM, escapesinglequote(indexfile, es1));	system(s);#endif#if	SFS_COMPAT	sprintf(s, "%s.tmp", indexfile);	rename(s, indexfile);#else	sprintf(s, "exec %s '%s.tmp' '%s'\n", SYSTEM_MV, escapesinglequote(indexfile, es1), escapesinglequote(indexfile, es2));	system(s);#endif	system(sync_path);	/* sync() has a BUG */}#endif	/* WORD_SORTED *//* Creates data structures that are related to the number of files present in * ".glimpse_filenames". These data structures are: * 1. index sets	-- use my_malloc * 2. index bufs	-- use my_malloc * Once this is done, this function can be called directly from glimpse/get_filenames() * and that can use all sets/bufs data structures directly. * This doesn't care how name_list() is created to be an array of arrays to be able to * add/delete dynamically from it: this uses malloc completely. * But: *	disable_list (which is used only inside glimpse_index) must be malloced separately.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -