📄 io.c
字号:
* Binary search mini_array[beginindex..endindex); return 1 if success, 0 if failure. * Sets begin and end offsets for direct search; initially beginindex=0, endindex=mini_array_len */intget_mini(word, len, beginoffset, endoffset, beginindex, endindex, minifp) unsigned char *word; int len; long *beginoffset, *endoffset; int beginindex, endindex; FILE *minifp;{ int cmp, midindex; if ((mini_array == NULL) || (mini_array_len <= 0)) return 0; midindex = beginindex + (endindex - beginindex)/2; cmp = strcmp(word, mini_array[midindex].word); if (cmp < 0) { /* word DEFINITELY BEFORE midindex (but still at or after beginindex) */ if (beginindex >= midindex) { /* range of search is just ONE element in array */ *beginoffset = mini_array[midindex].offset; if (midindex + 1 < mini_array_len) { *endoffset = mini_array[midindex + 1].offset; } else *endoffset = -1; /* go till end of file */ return 1; } else return get_mini(word, len, beginoffset, endoffset, beginindex, midindex); } else { /* word DEFINITELY AT OR AFTER midindex (but still before endindex) */ if ((cmp == 0) || (endindex <= midindex + 1)) { /* range of search is just ONE element in array */ *beginoffset = mini_array[midindex].offset; if (midindex + 1 < mini_array_len) { *endoffset = mini_array[midindex + 1].offset; } else *endoffset = -1; /* go till end of file */ return 1; } else return get_mini(word, len, beginoffset, endoffset, midindex, endindex); }}/* Returns: #of words in mini_array if success or already read, -1 if failure */intread_mini(indexfp, minifp) FILE *indexfp, *minifp; /* indexfp pointing right to first line of word+... */{ unsigned char s[MAX_LINE_LEN], word[MAX_NAME_LEN]; int wordnum = 0, wordlen; long offset; struct stat st; if ((mini_array != NULL) && (mini_array_len > 0)) return mini_array_len; if (minifp == NULL) return 0; if (fstat(fileno(minifp), &st) == -1) { fprintf(stderr, "Can't stat: %s\n", s); return -1; } rewind(minifp); fscanf(minifp, "%d\n", &mini_array_len); if ((mini_array_len <= 0) || (mini_array_len > (st.st_size / 4 /* \n, space, 1char offset, 1char word */))) { fprintf(stderr, "Error in format of: %s\n", s); return -1; } mini_array = (struct mini *)my_malloc(sizeof(struct mini) * mini_array_len); memset(mini_array, '\0', sizeof(struct mini) * mini_array_len); while ((wordnum < mini_array_len) && (fscanf(minifp, "%s %ld\n", word, &offset) != EOF)) { wordlen = strlen((char *)word); mini_array[wordnum].word = (char *)my_malloc(wordlen + 2); strcpy((char *)mini_array[wordnum].word, (char *)word); mini_array[wordnum].offset = offset; wordnum ++; } return mini_array_len;}dump_mini(indexfile) char *indexfile;{ unsigned char s[MAX_LINE_LEN], word[MAX_NAME_LEN]; FILE *indexfp; FILE *minifp; int wordnum = 0, j, attr_num; long offset; /* offset if offset of beginning of word */ char temp_rdelim[MAX_LINE_LEN]; temp_rdelim[0] = '\0'; /* Initialize just in case. 10/25/99 --GV */ if ((indexfp = fopen(indexfile, "r")) == NULL) { fprintf(stderr, "Can't open for reading: %s\n", indexfile); return; } sprintf(s, "%s/%s.tmp", INDEX_DIR, MINI_FILE); if ((minifp = fopen(s, "w")) == NULL) { fprintf(stderr, "Can't open for writing: %s\n", s); fclose(indexfp); return; } fgets(s, 256, indexfp); /* indexnumbers */ fgets(s, 256, indexfp); /* onefileperblock */ fscanf(indexfp, "%%%d%s\n", &attr_num, temp_delim); /* structured index */ offset = ftell(indexfp); while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) { if ((wordnum % WORDS_PER_REGION) == 0) { j = 0; while ((j < MAX_LINE_LEN) && (s[j] != WORD_END_MARK) && (s[j] != ALL_INDEX_MARK) && (s[j] != '\n')) j++; if ((j >= MAX_LINE_LEN) || (s[j] == '\n')) { wordnum ++; offset = ftell(indexfp); continue; } /* else it is WORD_END_MARK or ALL_INDEX_MARK */ s[j] = '\0'; strcpy((char *)word, (char *)s); if (fprintf(minifp, "%s %ld\n", word, offset) == EOF) { fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__); break; } mini_array_len ++; } wordnum ++; offset = ftell(indexfp); } fclose(indexfp); fflush(minifp); fclose(minifp); /* * Add amount of space needed for mini_array at the beginning */ sprintf(s, "%s/%s", INDEX_DIR, MINI_FILE); if ((minifp = fopen(s, "w")) == NULL) { fprintf(stderr, "Can't open for writing: %s\n", s); goto end; } sprintf(s, "%s/%s.tmp", INDEX_DIR, MINI_FILE); if ((indexfp = fopen(s, "r")) == NULL) { fprintf(stderr, "Can't open for reading: %s\n", s); fclose(minifp); goto end; } fprintf(minifp, "%d\n", mini_array_len); while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) { fputs(s, minifp); } fflush(minifp); fclose(minifp);end: sprintf(s, "%s/%s.tmp", INDEX_DIR, MINI_FILE); unlink(s); return;}#else /* WORD_SORTED */intget_mini(word, len, beginoffset, endoffset, beginindex, endindex, minifp) unsigned char *word; int len; long *beginoffset, *endoffset; int beginindex, endindex; FILE *minifp;{ int index; unsigned char array[sizeof(int)]; extern int glimpse_isserver; /* in agrep/agrep.c */ index = hash64k(word, len); if ((mini_array == NULL) || (mini_array_len <= 0) || !glimpse_isserver) { if (minifp == NULL) return 0; fseek(minifp, (long)(index*sizeof(int)), 0); if (fread((void *)array, sizeof(int), 1, minifp) != 1) return 0; *beginoffset = decode32b((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]); if (fread((void *)array, sizeof(int), 1, minifp) != 1) *endoffset = -1; else *endoffset = decode32b((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]); return 1; } *beginoffset = mini_array[index].offset; if (index + 1 < endindex) *endoffset = mini_array[index + 1].offset; else *endoffset = -1; return 1;}/* Returns: #of words in mini_array if success or already read, -1 if failure */intread_mini(indexfp, minifp) FILE *indexfp, *minifp; /* indexfp pointing right to first line of word+... */{ unsigned char s[MAX_LINE_LEN], array[sizeof(int)]; int offset, hash_value; if ((mini_array != NULL) && (mini_array_len > 0)) return mini_array_len; if (minifp == NULL) return 0; rewind(minifp); mini_array_len = MINI_ARRAY_LEN; mini_array = (struct mini *)my_malloc(sizeof(struct mini) * mini_array_len); memset(mini_array, '\0', sizeof(struct mini) * mini_array_len); hash_value = 0; /* line# I am going to scan */ offset = 0; while ((hash_value < MINI_ARRAY_LEN) && (fread((void *)array, sizeof(int), 1, minifp) == 1)) { offset = (array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]; mini_array[hash_value++].offset = decode32b(offset); } for (; hash_value<MINI_ARRAY_LEN; hash_value++) mini_array[hash_value].offset = -1; /* end of index file */ return mini_array_len;}/* * 1. Find hash64k values of each word. Then fprintf it before the word and put it * in another file. Sort it and put that as the real index. * 2. Then in the new index, dump offsets after stripping the hash value out, and * dump the offset at the hash_value-th line into the mini file. * 3. The only problem is that the offsets obtained from the index into the parti- * tions won't be in increasing order, but who cares? get_block_numbers() works! * 4. In merge_splits(), we have to re-sort everything by word for add-to-index * and fast-index to work properly. */dump_mini(indexfile) char *indexfile;{ unsigned char s[MAX_LINE_LEN], *t, word[MAX_NAME_LEN], c; unsigned char indexnumber[MAX_LINE_LEN], onefileperblock[MAX_LINE_LEN]; int attr_num, linelen; FILE *indexfp; FILE *newindexfp; FILE *minifp; long offset; /* offset if offset of beginning of word */ int eoffset, j, hash_value, prev_hash_value; /* NOT shorts!! */ int rc; /* return code from system(3) */ char es1[MAX_LINE_LEN], es2[MAX_LINE_LEN], es3[MAX_LINE_LEN], temp_rdelim[MAX_LINE_LEN]; temp_rdelim[0] = '\0'; /* Initialize in case not read. 10/25/99 --GV */ /* * First change the sorting order of the index file. */ if ((indexfp = fopen(indexfile, "r")) == NULL) { fprintf(stderr, "Can't open for reading: %s\n", indexfile); exit(2); } sprintf(s, "%s.tmp", indexfile); if ((newindexfp = fopen(s, "w")) == NULL) { fprintf(stderr, "Can't open for writing: %s\n", s); fclose(indexfp); exit(2); } /* Must store since sort -n can screw it up */ fgets(indexnumber, 256, indexfp); fgets(onefileperblock, 256, indexfp); if ( !fscanf(indexfp, "%%%d%s\n", &attr_num, temp_rdelim)) fscanf(indexfp, "%%%d\n", &attr_num); while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) { j = 0; linelen = strlen(s); while ((j < linelen) && (s[j] != WORD_END_MARK) && (s[j] != ALL_INDEX_MARK) && (s[j] != '\n') && (s[j] != '\0')) j++; if ((j >= linelen) || (s[j] == '\n') || (s[j] == '\0')) { continue; } /* else it is WORD_END_MARK or ALL_INDEX_MARK */ c = s[j]; s[j] = '\0'; hash_value = hash64k(s, j); s[j] = c; fprintf(newindexfp, "%d ", hash_value); if (fputs(s, newindexfp) == EOF) { fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__); exit(2); } } fclose(indexfp); fflush(newindexfp); fclose(newindexfp);#if SFS_COMPAT unlink(indexfile);#else sprintf(s, "exec %s '%s'", SYSTEM_RM, escapesinglequote(indexfile, es1)); system(s);#endif#if DONTUSESORT_T_OPTION || SFS_COMPAT sprintf(s, "exec %s -n '%s.tmp' > '%s'\n", SYSTEM_SORT, escapesinglequote(indexfile, es1), escapesinglequote(indexfile, es2));#else sprintf(s, "exec %s -n -T '%s' '%s.tmp' > '%s'\n", SYSTEM_SORT, escapesinglequote(INDEX_DIR, es1), escapesinglequote(indexfile, es2), escapesinglequote(indexfile, es3));#endif rc = system(s); if (rc >> 8) { fprintf (stderr, "'sort' command:\n"); fprintf (stderr, " %s\n", s); fprintf (stderr, "failed with exit status %d\n", rc>>8); exit(2); }#if SFS_COMPAT sprintf(s, "%s.tmp", indexfile); unlink(s);#else sprintf(s, "exec %s '%s.tmp'", SYSTEM_RM, escapesinglequote(indexfile, es1)); system(s);#endif system(sync_path); /* sync() has a BUG */ /* * Now dump the mini-file's offsets and create the stripped index file */ if ((indexfp = fopen(indexfile, "r")) == NULL) { fprintf(stderr, "Can't open for reading: %s\n", indexfile); exit(2); } sprintf(s, "%s.tmp", indexfile); if ((newindexfp = fopen(s, "w")) == NULL) { fprintf(stderr, "Can't open for writing: %s\n", s); fclose(indexfp); exit(2); } sprintf(s, "%s/%s", INDEX_DIR, MINI_FILE); if ((minifp = fopen(s, "w")) == NULL) { fprintf(stderr, "Can't open for writing: %s\n", s); fclose(indexfp); fclose(newindexfp); exit(2); } fputs(indexnumber, newindexfp); fputs(onefileperblock, newindexfp); if (attr_num != -2) fprintf(newindexfp, "%%%d\n", attr_num); else fprintf(newindexfp, "%%%d %s\n", attr_num, temp_rdelim); prev_hash_value = -1; hash_value = 0; offset = ftell(newindexfp); while (fgets(s, MAX_LINE_LEN, indexfp) != NULL) { linelen = strlen(s); t = s; while ((*t != ' ') && (t < s + linelen)) t++; if (t >= s + linelen) continue; *t = '\0'; sscanf(s, "%d", &hash_value); t ++; /* points to first character of the beginning of s */ fputs(t, newindexfp); if (hash_value != prev_hash_value) { for (j=prev_hash_value + 1; j<=hash_value; j++) { eoffset = encode32b((int)offset); putc((eoffset & 0xff000000) >> 24, minifp); putc((eoffset & 0xff0000) >> 16, minifp); putc((eoffset & 0xff00) >> 8, minifp); if (putc((eoffset & 0xff), minifp) == EOF) { fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__); exit(2); } } prev_hash_value = hash_value; } offset = ftell(newindexfp); } for (hash_value = prev_hash_value + 1; hash_value<MINI_ARRAY_LEN; hash_value++) { eoffset = encode32b((int)offset); /* end of index file */ putc((eoffset & 0xff000000) >> 24, minifp); putc((eoffset & 0xff0000) >> 16, minifp); putc((eoffset & 0xff00) >> 8, minifp); if (putc((eoffset & 0xff), minifp) == EOF) { fprintf(stderr, "Error: write failed at %s:%d\n", __FILE__, __LINE__); exit(2); } } fclose(indexfp); fflush(newindexfp); fclose(newindexfp); fflush(minifp); fclose(minifp);#if SFS_COMPAT unlink(indexfile);#else sprintf(s, "exec %s '%s'", SYSTEM_RM, escapesinglequote(indexfile, es1)); system(s);#endif#if SFS_COMPAT sprintf(s, "%s.tmp", indexfile); rename(s, indexfile);#else sprintf(s, "exec %s '%s.tmp' '%s'\n", SYSTEM_MV, escapesinglequote(indexfile, es1), escapesinglequote(indexfile, es2)); system(s);#endif system(sync_path); /* sync() has a BUG */}#endif /* WORD_SORTED *//* Creates data structures that are related to the number of files present in * ".glimpse_filenames". These data structures are: * 1. index sets -- use my_malloc * 2. index bufs -- use my_malloc * Once this is done, this function can be called directly from glimpse/get_filenames() * and that can use all sets/bufs data structures directly. * This doesn't care how name_list() is created to be an array of arrays to be able to * add/delete dynamically from it: this uses malloc completely. * But: * disable_list (which is used only inside glimpse_index) must be malloced separately.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -