📄 get_index.c
字号:
fprintf(stderr, "%s: run out of file descriptors!\n", GProgname); return -1; } errno = 0; if ((ret = fileagrep(index_argc, index_argv, 0, f_in)) < 0) { fprintf(stderr, "%s: error in searching index\n", HARVEST_PREFIX); fclose(f_in); return -1; } fflush(f_in); fclose(f_in); f_in = NULL; index_argv[patbufpos] = NULL; /* For index-search with memgrep and get-filenames */ dummypat[0] = '\0'; if ((dummylen = memagrep_init(index_argc, index_argv, MAX_PAT, dummypat)) <= 0) { fclose(f_in); return -1; } /* Interpret the result */ if((f_in = fopen(infile, "r")) == NULL) { fprintf(stderr, "%s: can't open for reading: %s/%s\n", GProgname, INDEX_DIR, infile); return -1; } if (OneFilePerBlock) { for (patnum=0; patnum<num_mgrep_pat; patnum ++) { for(i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) { multi_dest_index_set[patnum][i] = 0; } if (ByteLevelIndex) for(i=0; i<OneFilePerBlock; i++) { free_list(&multi_dest_offset_table[patnum][i]); /* multi_dest_offset_table[patnum][i] = NULL; bg, 28/9/1995 */ } multi_dest_index_set[patnum][REAL_PARTITION - 1] = 0; multi_dest_index_set[patnum][REAL_PARTITION - 2] = 0; } } else { for (patnum=0; patnum<num_mgrep_pat; patnum ++) for(i=0; i<MAX_PARTITION; i++) { multi_dest_index_set[patnum][i] = 0; } } dest_index_buf[0] = '\n'; /* memagrep needs buffer to begin with '\n' */ memset(allindexmark, '\0', num_mgrep_pat); min = (index_tab[REAL_PARTITION - 1] == 1) ? 0 : index_tab[REAL_PARTITION - 2]; while(fgets(dest_index_buf+1, REAL_INDEX_BUF, f_in)) { patnum=0; sscanf(&dest_index_buf[1], "%d-", &patnum);#if BG_DEBUG fprintf(debug, "patnum=%d len=%d pat=%s attr=%d index-line: %s\n", patnum, pat_lens[mgrep_pat_index[patnum-1]], pat_list[mgrep_pat_index[patnum-1]], pat_attr[mgrep_pat_index[patnum-1]], dest_index_buf+1);#endif /*BG_DEBUG*/ if ((patnum < 1) || (patnum > num_mgrep_pat)) continue; /* error! */ setptr = multi_dest_index_set[patnum - 1]; offsetptr = multi_dest_offset_table[patnum - 1]; for(k=0; dest_index_buf[k] != ' '; k++); dest_index_buf[k] = '\n'; if (!allindexmark[patnum - 1]) allindexmark[patnum - 1] = (char)get_set(&dest_index_buf[k], setptr, offsetptr, pat_lens[mgrep_pat_index[patnum-1]], pat_list[mgrep_pat_index[patnum-1]], pat_attr[mgrep_pat_index[patnum-1]], outfile, partfp, &setptr[REAL_PARTITION - 2], min); /* To test the maximum disparity to stop unions within above */ if (!allindexmark[patnum-1]) min = setptr[REAL_PARTITION - 2]; for (patnum=0; patnum<num_mgrep_pat; patnum ++) { if ((multi_dest_index_set[patnum][REAL_PARTITION - 2] < min) && (multi_dest_index_set[patnum][REAL_PARTITION - 1] != 1)) min = multi_dest_index_set[patnum][REAL_PARTITION - 2]; } min += (index_tab[REAL_PARTITION - 1] == 1) ? 0 : index_tab[REAL_PARTITION - 2]; }#if 0 for (patnum=0; patnum<num_mgrep_pat; patnum++) printf("%d=%d,%d\n", patnum, multi_dest_index_set[patnum][REAL_PARTITION - 1], multi_dest_index_set[patnum][REAL_PARTITION - 2]);#endif /*0*/ for (patnum=0; patnum<num_mgrep_pat; patnum++) sorted[patnum] = patnum; if (ByteLevelIndex && !NOBYTELEVEL && (RecordLevelIndex || !(Only_first && !PRINTAPPXFILEMATCH))) { max = 0; for (patnum=1; patnum<num_mgrep_pat; patnum++) { if (multi_dest_index_set[patnum][REAL_PARTITION - 2] > multi_dest_index_set[max][REAL_PARTITION - 2]) max = patnum; } /* Sort them according to the lengths of the lists in increasing order: min first */ for (patnum=0; patnum<num_mgrep_pat; patnum++) { min = patnum; for (j=patnum+1; j<num_mgrep_pat; j++) if (multi_dest_index_set[sorted[j]][REAL_PARTITION - 2] < multi_dest_index_set[sorted[min]][REAL_PARTITION - 2]) min = j; if (min != patnum) { temp = sorted[patnum]; sorted[patnum] = sorted[min]; sorted[min] = temp; } }#if USEFREQUENCIES if (!RecordLevelIndex && (multi_dest_index_set[sorted[max]][REAL_PARTITION - 2] > MAX_DISPARITY * multi_dest_index_set[sorted[0]][REAL_PARTITION - 2])) { NOBYTELEVEL = 1; /* printf("4 "); */ for (iii=0; iii<OneFilePerBlock; iii++) { for (jjj=0; jjj<num_mgrep_pat; jjj++) free_list(&multi_dest_offset_table[jjj][iii]); free_list(&offset_tab[iii]); } }#endif } else if (!RecordLevelIndex && NOBYTELEVEL) { for (iii=0; iii<OneFilePerBlock; iii++) { for (jjj=0; jjj<num_mgrep_pat; jjj++) free_list(&multi_dest_offset_table[jjj][iii]); free_list(&offset_tab[iii]); } } /* Take intersection if parse=ANDPAT or 0 (one terminal pattern), union if OR_EXP; Take care of universal sets in offset_tab[REAL_PARTITION - 1] */ for (patnum=0; patnum<num_mgrep_pat; patnum++) { if (OneFilePerBlock) { if (parse & OR_EXP) { if (allindexmark[sorted[patnum]]) { ret_is_1: index_tab[REAL_PARTITION - 1] = 1; for(i=0; i<round(OneFilePerBlock, 8*sizeof(int)) - 1; i++) { index_tab[i] = 0xffffffff; } index_tab[i] = 0; for (j=0; j<8*sizeof(int); j++) { if (i*8*sizeof(int) + j >= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } if (ByteLevelIndex && !RecordLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ for (i=0; i<OneFilePerBlock; i++) { for (patnum=0;patnum<num_mgrep_pat;patnum++) free_list(&multi_dest_offset_table[sorted[patnum]][i]); free_list(&offset_tab[i]); } if (ByteLevelIndex && !RecordLevelIndex) NOBYTELEVEL = 1; fclose(f_in); return 0; } index_tab[REAL_PARTITION - 1] = 0; for (i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) index_tab[i] |= multi_dest_index_set[sorted[patnum]][i]; if (ByteLevelIndex && !NOBYTELEVEL && (RecordLevelIndex || !(Only_first && !PRINTAPPXFILEMATCH))) { for (i=0; i<OneFilePerBlock; i++) { sorted_union(&offset_tab[i], &multi_dest_offset_table[sorted[patnum]][i], &index_tab[REAL_PARTITION - 2], multi_dest_index_set[sorted[patnum]][REAL_PARTITION - 2], 0); if (!RecordLevelIndex && NOBYTELEVEL) { /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ for (iii=0; iii<OneFilePerBlock; iii++) { for (jjj=0; jjj<num_mgrep_pat; jjj++) free_list(&multi_dest_offset_table[jjj][iii]); free_list(&offset_tab[iii]); } break; } } } } else { if (((index_tab[REAL_PARTITION - 1] == 1) || first_time) && (allindexmark[sorted[patnum]])) { both_are_1: if (first_time) { index_tab[REAL_PARTITION - 1] = 1; for(i=0; i<round(OneFilePerBlock, 8*sizeof(int)) - 1; i++) { index_tab[i] = 0xffffffff; } index_tab[i] = 0; for (j=0; j<8*sizeof(int); j++) { if (i*8*sizeof(int) + j >= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } } first_time = 0; if (ByteLevelIndex && !RecordLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ for (i=0; i<OneFilePerBlock; i++) { for (patnum=0;patnum<num_mgrep_pat;patnum++) free_list(&multi_dest_offset_table[sorted[patnum]][i]); free_list(&offset_tab[i]); } if (ByteLevelIndex && !RecordLevelIndex) NOBYTELEVEL = 1; /* fclose(f_in); return 0; */ } else if ((index_tab[REAL_PARTITION - 1] == 1) || first_time) { first_time = 0; index_tab[REAL_PARTITION - 1] = 0; for (i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) index_tab[i] = multi_dest_index_set[sorted[patnum]][i]; if (ByteLevelIndex && !NOBYTELEVEL && (RecordLevelIndex || !(Only_first && !PRINTAPPXFILEMATCH))) { for (i=0; i<OneFilePerBlock; i++) { free_list(&offset_tab[i]); offset_tab[i] = multi_dest_offset_table[sorted[patnum]][i]; multi_dest_offset_table[sorted[patnum]][i] = NULL; } } } else if (allindexmark[sorted[patnum]]) { if (ByteLevelIndex && !RecordLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ for (i=0; i<OneFilePerBlock; i++) free_list(&multi_dest_offset_table[sorted[patnum]][i]); } else { for (i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) index_tab[i] &= multi_dest_index_set[sorted[patnum]][i]; if (ByteLevelIndex && !NOBYTELEVEL && (RecordLevelIndex || !(Only_first && !PRINTAPPXFILEMATCH))) { if (first_time || WHOLEFILESCOPE) { first_time = 0; for (i=0; i<OneFilePerBlock; i++) { sorted_union(&offset_tab[i], &multi_dest_offset_table[sorted[patnum]][i], &index_tab[REAL_PARTITION - 2], multi_dest_index_set[sorted[patnum]][REAL_PARTITION - 2], 0); if (!RecordLevelIndex && NOBYTELEVEL) { for (iii=0; iii<OneFilePerBlock; iii++) { for (jjj=0; jjj<num_mgrep_pat; jjj++) free_list(&multi_dest_offset_table[jjj][iii]); free_list(&offset_tab[iii]); } break; } } } else { for (i=0; i<OneFilePerBlock; i++) { if ((index_tab[block2index(i)] & mask_int[i % (8*sizeof(int))])) sorted_intersection(i, &offset_tab[i], &multi_dest_offset_table[sorted[patnum]][i], &index_tab[REAL_PARTITION - 2]); else free_list(&multi_dest_offset_table[sorted[patnum]][i]); /* if (index_tab[REAL_PARTITION - 2] < MIN_OCCURRENCES) { if (!NOBYTELEVEL) { for (iii=0; iii<OneFilePerBlock; iii++) { for (jjj=0; jjj<num_mgrep_pat; jjj++) free_list(&multi_dest_offset_table[jjj][iii]); free_list(&offset_tab[iii]); } } NOBYTELEVEL = 1; OPTIMIZEBYTELEVEL = 1; break; } */ } } } } } } else { if (parse & OR_EXP) { for (patnum=0; patnum<num_mgrep_pat; patnum++) for(i=0; i<MAX_PARTITION; i++) index_tab[i] |= multi_dest_index_set[patnum][i]; } else { for (patnum=0; patnum<num_mgrep_pat; patnum++) for(i=0; i<MAX_PARTITION; i++) index_tab[i] &= multi_dest_index_set[patnum][i]; } } }#if BG_DEBUG fprintf(debug, "get_index(): the following partitions are ON\n"); for(i=0; i<((OneFilePerBlock > 0) ? round(OneFilePerBlock, 8*sizeof(int)) : MAX_PARTITION); i++) { if(index_tab[i]) fprintf(debug, "%d,%x\n", i, index_tab[i]); }#endif /*BG_DEBUG*/ fclose(f_in); return 0;}/* All borrowed from main.c and are needed for searching the index */extern CHAR *pat_list[MAXNUM_PAT]; /* complete words within global pattern */extern int pat_lens[MAXNUM_PAT]; /* their lengths */extern int pat_attr[MAXNUM_PAT]; /* set of attributes */extern int num_pat;extern CHAR pat_buf[(MAXNUM_PAT + 2)*MAXPAT];extern int pat_ptr;extern int is_mgrep_pat[MAXNUM_PAT];extern int mgrep_pat_index[MAXNUM_PAT];extern int num_mgrep_pat;extern unsigned int *src_index_set;extern struct offsets **src_offset_table;extern char tempfile[];extern int patindex;extern int patbufpos;extern ParseTree terminals[MAXNUM_PAT];extern int GBESTMATCH; /* Should I change -B to -# where # = no. of errors? */extern int bestmatcherrors; /* set during index search, used later on */extern FILE *partfp; /* glimpse partitions */extern FILE *nullfp; /* to discard output: agrep -s doesn't work properly */extern int ComplexBoolean;extern int num_terminals;#if 0extern struct token *hash_table[MAX_64K_HASH];#else /*0*/extern int mini_array_len;#endif /*0*/extern int WORDBOUND, NOUPPER, D, LINENUM;intveryfastsearch(argc, argv, num_pat, pat_list, pat_lens, minifp) int argc; char *argv[]; int num_pat; CHAR *pat_list[MAXNUM_PAT]; int pat_lens[MAXNUM_PAT]; FILE *minifp;{ /* * Figure out from options if very fast search is possible. */ if (minifp == NULL) return 0; if (!OneFilePerBlock) return 0; /* you did not build index for speed anyway */ if (!(WORDBOUND && NOUPPER && (D<=0))) return 0; if (LINENUM) return 0; return 1; /* if ((num_mgrep_pat == num_pat) || ((1 == num_pat) && (1 == checksg(pat_list[0], D, 0)))) return 1; */ /* either all >= 2 patterns are mgrep-able (simple) or there is just one simple pattern: i.e., "cast" can be used! */ /* return 0; */}intmini_agrep(inword, inlen, outfp) CHAR *inword; int inlen; FILE *outfp;{ static struct stat st; static int statted = 0; unsigned char s[MAX_LINE_LEN], word[MAX_NAME_LEN]; long beginoffset, endoffset, curroffset; unsigned char c; int j, num = 0, cmp, len; if (!statted) { sprintf((char*)s, "%s/%s", INDEX_DIR, INDEX_FILE); if (stat(s, &st) == -1) { fprintf(stderr, "Can't stat file: %s\n", s); exit(2); } statted = 1; } j = 0; while (*inword) { if (*inword == '\\') { inword++; continue; } if (isupper(*(unsigned char *)inword)) word[j] = tolower(*(unsigned char *)inword); else word[j] = *inword; j++; inword ++; } word[j] = '\0'; len = j; if (!get_mini(word, len, &beginoffset, &endoffset, 0, mini_array_len, minifp)) return 0; if (endoffset == -1) endoffset = st.st_size; if (endoffset <= beginoffset) return 0; /* We must find all occurrences of the word (in all attributes) so can't quit when we find the first match */ fseek(indexfp, beginoffset, 0); curroffset = ftell(indexfp); /* = beginoffset */ while ((curroffset < endoffset) && (fgets(s, MAX_LINE_LEN, indexfp) != NULL)) { j = 0; while ((j < MAX_LINE_LEN) && (s[j] != WORD_END_MARK) && (s[j] != ALL_INDEX_MARK) && (s[j] != '\0') && (s[j] != '\n')) j++;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -