📄 get_index.c
字号:
else if (OneFilePerBlock <= MaxNum16bPartition) { x = (buffer[bdx2] << 8) | buffer[bdx2+1]; x = decode16b(x); bdx2 += 2; } else { x = (buffer[bdx2] << 16) | (buffer[bdx2+1] << 8) | buffer[bdx2+2]; x = decode24b(x); bdx2 += 3; } if ((last_Y_filenumber > 0) && (x >= last_Y_filenumber)) continue; set[block2index(x)] |= block2mask(x); if (PRINTINDEXLINE) { printf("%d [", x); } prevy = 0; if (ByteLevelIndex) { heado = tailo = NULL; curfreq = 0; while ((bdx2<REAL_INDEX_BUF) && (buffer[bdx2] != '\n') && (buffer[bdx2] != '\0')) { y = decode8b(buffer[bdx2]); if ((y & 0x000000c0) == 0) { /* one byte offset */ diff = y&0x0000003f; y = prevy + diff; bdx2 ++; } else if ((y & 0x000000c0) == 0x40) { /* two byte offset */ diff = decode8b(buffer[bdx2+1]); y = prevy + (((y & 0x0000003f) * MaxNum8bPartition) + diff); bdx2 += 2; } else if ((y & 0x000000c0) == 0x80) { /* three byte offset */ diff = decode16b((buffer[bdx2+1] << 8) | buffer[bdx2+2]); y = prevy + (((y & 0x0000003f) * MaxNum16bPartition) + diff); bdx2 += 3; } else { /* four byte offset */ diff = decode24b((buffer[bdx2+1] << 16) | (buffer[bdx2+2] << 8) | buffer[bdx2+3]); y = prevy + (((y & 0x0000003f) * MaxNum24bPartition) + diff); bdx2 += 4; } prevy = y; if (PRINTINDEXLINE) { printf(" %d", y); } curfreq ++; if(RecordLevelIndex || (!(Only_first && !PRINTAPPXFILEMATCH) && !NOBYTELEVEL && /* below borrowed from sorted_union */#if USEFREQUENCIES !(((prevfreq>MIN_OCCURRENCES)&&(curfreq+*frequency > MAX_UNION*prevfreq)) || (curfreq+*frequency > MAX_ABSOLUTE))#else 1#endif ) ) { /* These o's will be in sorted order. Just collect all of them and merge with &offset_table[x]. */ o = (struct offsets *)my_malloc(sizeof(struct offsets)); o->offset = y; o->next = NULL; o->sign = o->done = 0; if (heado == NULL) { heado = o; tailo = o; } else { tailo->next = o; tailo = o; } } else if (!RecordLevelIndex) { if (heado != NULL) free_list(&heado); /* printf("1 "); */ NOBYTELEVEL = 1; /* can't return since have to or the bitmasks */ } if ((bdx2<REAL_INDEX_BUF) && (buffer[bdx2] == delim)) { /* look at offsets corr. to a new file now */ bdx2 ++; break; } } if (heado == NULL) *frequency += curfreq; else if (RecordLevelIndex || (!(Only_first && !PRINTAPPXFILEMATCH) && !NOBYTELEVEL)) { sorted_union(&offset_table[x], &heado, frequency, prevfreq, curfreq); /* this will free heado's elements and ++ *frequency */ if (!RecordLevelIndex && NOBYTELEVEL) *frequency += curfreq; /* can't return since have to or the bitmasks */ if (heado != NULL) free_list(&heado); } } if (PRINTINDEXLINE) { printf("] "); } } } else { while((bdx2<MAX_INDEX_BUF) && (buffer[bdx2] != '\n') && (buffer[bdx2] != '\0') && (buffer[bdx2] < MAX_PARTITION)) { if ((last_Y_filenumber > 0) && (p_table[buffer[bdx2]] >= last_Y_filenumber)) { bdx2 ++; continue; } if (PRINTINDEXLINE) { for (j=p_table[buffer[bdx2]]; j<p_table[buffer[bdx2] + 1]; j++) if ((last_Y_filenumber > 0) && (j >= last_Y_filenumber)) break; else printf("%d [] ", j); } set[buffer[bdx2]] = 1; bdx2++; } } if (PRINTINDEXLINE) { printf("\n"); } return 0;}/* * This is a very simple function: it gets the list of matched lines from the index, * and sets the block numbers corr. to files that need to be searched in "index_tab". * It also sets the file-offsets that have to be searched in "offset_tab" (byte-level). */get_index(infile, index_tab, offset_tab, pattern, patlen, patattr, index_argv, index_argc, outfile, partfp, parse, first_time)char *infile;unsigned int *index_tab;struct offsets **offset_tab;char *pattern;int patlen;int patattr;char *index_argv[];int index_argc;FILE *outfile;FILE *partfp;int parse;int first_time;{ int i=0, j, iii; FILE *f_in; struct offsets **offsetptr = multi_dest_offset_table[0]; /* cannot be NULL if ByteLevelIndex: main.c takes care of that */ int ret=0; if (OneFilePerBlock && (parse & OR_EXP) && (index_tab[REAL_PARTITION - 1] == 1)) return 0; if (((infile == NULL) || !strcmp(infile, "")) /* || (index_tab == NULL) || (offset_tab == NULL) || (pattern == NULL)*/) return -1; if((f_in = fopen(infile, "r")) == NULL) { fprintf(stderr, "%s: can't open for reading: %s/%s\n", GProgname, INDEX_DIR, infile); return -1; } if (OneFilePerBlock) for(i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) { dest_index_set[i] = 0; } else for(i=0; i<MAX_PARTITION; i++) { dest_index_set[i] = 0; } dest_index_buf[0] = '\n'; /* memagrep needs buffer to begin with '\n' */ dest_index_set[REAL_PARTITION - 2] = 0; while(fgets(dest_index_buf+1, REAL_INDEX_BUF-1, f_in)) {#if BG_DEBUG fprintf(debug, "index-line: %s", dest_index_buf+1);#endif /*BG_DEBUG*/ if ((ret = get_set(&dest_index_buf[0], dest_index_set, offsetptr, patlen, pattern, patattr, outfile, partfp, &dest_index_set[REAL_PARTITION - 2], index_tab[REAL_PARTITION - 2])) != 0) break; /* all index mark touched */ } if (!RecordLevelIndex && NOBYTELEVEL) { for (iii=0; iii<OneFilePerBlock; iii++) { free_list(&offset_tab[iii]); free_list(&offsetptr[iii]); } } if (INVERSE) { if (OneFilePerBlock) { if (ByteLevelIndex && !RecordLevelIndex) NOBYTELEVEL = 1; /* can't collect all offsets where pattern DOES NOT occur! */ for (i=0; i<round(OneFilePerBlock, 8*sizeof(int)) - 1; i++) dest_index_set[i] = ~dest_index_set[i]; for (j=0; j<8*sizeof(int); j++) { if (i*8*sizeof(int) + j >= OneFilePerBlock) break; if (dest_index_set[i] & mask_int[j]) dest_index_set[i] &= ~mask_int[j]; else dest_index_set[i] |= mask_int[j]; } } else { for(i=0; i<MAX_PARTITION; i++) { if (i>=GNumpartitions-1) break; /* STUPID: get_table returns 1 + part_num, where part_num was no. of partitions glimpseindex found */ if ((i == 0) || (i == '\n')) continue; if (dest_index_set[i]) dest_index_set[i] = 0; else dest_index_set[i] = 1; } } } /* Take intersection if parse=ANDPAT or 0 (one terminal pattern), union if OR_EXP; Take care of universal sets in index_tab[REAL_PARTITION - 1] */ if (OneFilePerBlock) { if (parse & OR_EXP) { if (ret) { ret_is_1: index_tab[REAL_PARTITION - 1] = 1; for(i=0; i<round(OneFilePerBlock, 8*sizeof(int)) - 1; i++) { index_tab[i] = 0xffffffff; } index_tab[i] = 0; for (j=0; j<8*sizeof(int); j++) { if (i*8*sizeof(int) + j >= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } if (ByteLevelIndex && !RecordLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i<OneFilePerBlock; i++) { /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ free_list(&offsetptr[i]); free_list(&offset_tab[i]); } if (ByteLevelIndex && !RecordLevelIndex) NOBYTELEVEL = 1; fclose(f_in); return 0; } index_tab[REAL_PARTITION - 1] = 0; for (i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) index_tab[i] |= dest_index_set[i]; if (ByteLevelIndex && !NOBYTELEVEL && (RecordLevelIndex || !(Only_first && !PRINTAPPXFILEMATCH))) { for (i=0; i<OneFilePerBlock; i++) { sorted_union(&offset_tab[i], &offsetptr[i], &index_tab[REAL_PARTITION - 2], dest_index_set[REAL_PARTITION - 2], 0); if (!RecordLevelIndex && NOBYTELEVEL) { /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ for (iii=0; iii<OneFilePerBlock; iii++) { free_list(&offset_tab[iii]); free_list(&offsetptr[iii]); } break; } } } } else { if (((index_tab[REAL_PARTITION - 1] == 1) || first_time) && (ret)) { both_are_1: if (first_time) { index_tab[REAL_PARTITION - 1] = 1; for(i=0; i<round(OneFilePerBlock, 8*sizeof(int)) - 1; i++) { index_tab[i] = 0xffffffff; } index_tab[i] = 0; for (j=0; j<8*sizeof(int); j++) { if (i*8*sizeof(int) + j >= OneFilePerBlock) break; index_tab[i] |= mask_int[j]; } } first_time = 0; if (ByteLevelIndex && !RecordLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i<OneFilePerBlock; i++) { /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ free_list(&offsetptr[i]); free_list(&offset_tab[i]); } if (ByteLevelIndex && !RecordLevelIndex) NOBYTELEVEL = 1; /* fclose(f_in); return 0; */ } else if ((index_tab[REAL_PARTITION - 1] == 1) || first_time) { first_time = 0; index_tab[REAL_PARTITION - 1] = 0; for (i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) index_tab[i] = dest_index_set[i]; if (ByteLevelIndex && !NOBYTELEVEL && (RecordLevelIndex || !(Only_first && !PRINTAPPXFILEMATCH))) { for (i=0; i<OneFilePerBlock; i++) { free_list(&offset_tab[i]); offset_tab[i] = offsetptr[i]; offsetptr[i] = NULL; } } } else if (ret) { if (ByteLevelIndex && !RecordLevelIndex && !NOBYTELEVEL && !(Only_first && !PRINTAPPXFILEMATCH)) for (i=0; i<OneFilePerBlock; i++) free_list(&offsetptr[i]); /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ } else { for (i=0; i<round(OneFilePerBlock, 8*sizeof(int)); i++) index_tab[i] &= dest_index_set[i]; if (ByteLevelIndex && !NOBYTELEVEL && (RecordLevelIndex || !(Only_first && !PRINTAPPXFILEMATCH))) { if (first_time || WHOLEFILESCOPE) { first_time = 0; for (i=0; i<OneFilePerBlock; i++) { sorted_union(&offset_tab[i], &offsetptr[i], &index_tab[REAL_PARTITION - 2], dest_index_set[REAL_PARTITION - 2], 0); if (!RecordLevelIndex && NOBYTELEVEL) { /* collect as many offsets as possible with RecordLevelIndex: free offset_tables at the end of process_query() */ for (iii=0; iii<OneFilePerBlock; iii++) { free_list(&offset_tab[iii]); free_list(&offsetptr[iii]); } break; } } } else { for (i=0; i<OneFilePerBlock; i++) { if ((index_tab[block2index(i)] & mask_int[i % (8*sizeof(int))])) sorted_intersection(i, &offset_tab[i], &offsetptr[i], &index_tab[REAL_PARTITION - 2]); else free_list(&offsetptr[i]); /* if (index_tab[REAL_PARTITION - 2] < MIN_OCCURRENCES) { if (!NOBYTELEVEL) { for (iii=0; iii<OneFilePerBlock; iii++) { free_list(&offset_tab[iii]); free_list(&offsetptr[iii]); } } NOBYTELEVEL = 1; OPTIMIZEBYTELEVEL = 1; break; } */ } } } } } } else { if (parse & OR_EXP) for(i=0; i<MAX_PARTITION; i++) index_tab[i] |= dest_index_set[i]; else for(i=0; i<MAX_PARTITION; i++) index_tab[i] &= dest_index_set[i]; }#if BG_DEBUG fprintf(debug, "get_index(): the following partitions are ON\n"); for(i=0; i<((OneFilePerBlock > 0) ? round(OneFilePerBlock, 8*sizeof(int)) : MAX_PARTITION); i++) { if(index_tab[i]) fprintf(debug, "%d,%x\n", i, index_tab[i]); }#endif /*BG_DEBUG*/ fclose(f_in); return 0;}/* * Same as above, but uses mgrep to search the index for many patterns at one go, * and interprets the output obtained from the -M and -P options (set in main.c). */mgrep_get_index(infile, index_tab, offset_tab, pat_list, pat_lens, pat_attr, mgrep_pat_index, num_mgrep_pat, patbufpos, index_argv, index_argc, outfile, partfp, parse, first_time)char *infile;unsigned int *index_tab;struct offsets **offset_tab;char *pat_list[];int pat_lens[];int pat_attr[];int mgrep_pat_index[];int num_mgrep_pat;int patbufpos;char *index_argv[];int index_argc;FILE *outfile;FILE *partfp;int parse;int first_time;{ int i=0, j, temp, iii, jjj; FILE *f_in; int ret; int x=0, y=0, even_words=1; int patnum; unsigned int *setptr; struct offsets **offsetptr; CHAR dummypat[MAX_PAT]; int dummylen=0; char allindexmark[MAXNUM_PAT]; int k; int sorted[MAXNUM_PAT], min, max; if (OneFilePerBlock && (parse & OR_EXP) && (index_tab[REAL_PARTITION - 1] == 1)) return 0; /* Do the mgrep() */ if ((f_in = fopen(infile, "w")) == NULL) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -