📄 get_index.c
字号:
/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */#include "glimpse.h"#include "defs.h"#if BG_DEBUGextern FILE *debug;#endif /*BG_DEBUG*/extern char INDEX_DIR[MAX_LINE_LEN];extern int Only_first;extern int PRINTAPPXFILEMATCH;extern int OneFilePerBlock;extern int StructuredIndex;extern int WHOLEFILESCOPE;extern unsigned int *dest_index_set;extern unsigned char *dest_index_buf;extern int mask_int[32];extern int errno;extern int ByteLevelIndex;extern int RecordLevelIndex;extern int rdelim_len;extern char rdelim[MAX_LINE_LEN];extern char old_rdelim[MAX_LINE_LEN];extern int NOBYTELEVEL;extern int OPTIMIZEBYTELEVEL;extern int RegionLimit;extern int PRINTINDEXLINE;extern struct offsets **src_offset_table;extern unsigned int *multi_dest_index_set[MAXNUM_PAT];extern struct offsets **multi_dest_offset_table[MAXNUM_PAT];extern char *index_argv[MAX_ARGS];extern int index_argc;extern CHAR GProgname[MAXNAME];extern FILE *indexfp, *minifp;extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE;extern int p_table[MAX_PARTITION];extern int GNumpartitions;extern int INVERSE; /* agrep's global: need here to implement ~ in index-search */extern int last_Y_filenumber;#define USEFREQUENCIES 0 /* set to one if we want to stop collecting offsets sometimes since words "look" like they are in the stop list... */free_list(p1) struct offsets **p1;{ struct offsets *tp1; while (*p1 != NULL) { tp1 = *p1; *p1 = (*p1)->next; my_free(tp1, sizeof(struct offsets)); }}/* Unions offset lists list2 with list1 sorted in increasing order (deletes elements from list2) => changes both list1 and list2: f += #elems added */sorted_union(list1, list2, f, pf, cf) struct offsets **list1, **list2; int *f, pf, cf;{ register struct offsets **p1 = list1, *p2; register int count = *f; /* don't update *f if setting NOBYTELEVEL */ if (!RecordLevelIndex && NOBYTELEVEL) { /* cannot come here! */ free_list(list1); free_list(list2); return; }#if USEFREQUENCIES if (!RecordLevelIndex && ( ((pf > MIN_OCCURRENCES) && (count > MAX_UNION * pf)) || (count > MAX_ABSOLUTE) || ((count > MIN_OCCURRENCES) && (pf > MAX_UNION * count)) || (pf > MAX_ABSOLUTE) )) { /* enough if we check the second condition at the beginning since it won't surely be satisfied after this when count ++ */ NOBYTELEVEL = 1; return; }#endif while (*list2 != NULL) { /* extract 1st element, update list2 */ p2 = *list2; *list2 = (*list2)->next; p2->next = NULL; /* find position to insert p2, and do so */ p1 = list1; while (((*p1) != NULL) && ((*p1)->offset < p2->offset)) p1 = &(*p1)->next; if (*p1 == NULL) { /* end of list1: append list2 to it and return */ *p1 = p2; p2->next = *list2; *list2 = NULL; if (cf > 0) count = *f + cf;#if USEFREQUENCIES if (!RecordLevelIndex && ( ((pf > MIN_OCCURRENCES) && (count > MAX_UNION * pf)) || (count > MAX_ABSOLUTE))) { NOBYTELEVEL = 1; return; }#endif *f = count; return; } else if (p2->offset == (*p1)->offset) my_free(p2, sizeof(struct offsets)); else { p2->next = *p1; *p1 = p2; count ++;#if USEFREQUENCIES if (!RecordLevelIndex && ( ((pf > MIN_OCCURRENCES) && (count > MAX_UNION * pf)) || (count > MAX_ABSOLUTE) )) { NOBYTELEVEL = 1; return; }#endif /* update list1 */ list1 = &(*p1)->next; } } *f = count;}/* Intersects offset lists list2 with list1 sorted in increasing order (deletes elements from list2) => changes both list1 and list2 */sorted_intersection(filenum, list1, list2, f) struct offsets **list1, **list2; int *f;{ register struct offsets **p1 = list1, *p2, *tp1; register int diff; struct offsets *tp; if (!RecordLevelIndex && NOBYTELEVEL) { /* cannot come here! */ free_list(list1); free_list(list2); return; } /* NOT NECESSARY SINCE done INITIALIZED TO 0 ON CREATION AND MADE 0 BELOW tp = *list1; while (tp != NULL) { tp->done = 0; tp = tp->next; } */#if 0printf("sorted_intersection BEGIN: list1=\n\t");tp = *list1;while (tp != NULL) { printf("%d ", tp->offset); tp = tp->next;}printf("\n");printf("list2=\n\t");tp = *list2;while (tp != NULL) { printf("%d ", tp->offset); tp = tp->next;}printf("\n");#endif /* find position to intersect list2, and do so: REMEBER: list1 is in increasing order, and so is list2 !!! */ p1 = list1; while ( ((*p1) != NULL) && (*list2 != NULL) ) { diff = (*list2)->offset - (*p1)->offset; if ( (diff == 0) || (!RecordLevelIndex && (diff >= -RegionLimit) && (diff <= RegionLimit)) ) { (*p1)->done = 1; /* p1 is in */ p1 = &(*p1)->next; /* Can't increment p2 here since it might keep others after p1 also in */ } else { if (diff < 0) { p2 = *list2; *list2 = (*list2)->next; my_free(p2, sizeof(struct offsets)); /* p1 can intersect with list2's next */ } else { if((*p1)->done && 0) p1 = &(*p1)->next; /* imposs */ /* THIS CHECK ALWAYS YEILDS 0 FROM 25/08/1996: bgopal@cs.arizona.edu */ else { tp1 = *p1; *p1 = (*p1)->next; my_free(tp1, sizeof(struct offsets)); (*f) --; } /* list2 can intersect with p1's next */ } } } while (*list2 != NULL) { p2 = *list2; *list2 = (*list2)->next; my_free(p2, sizeof(struct offsets)); } p1 = list1; while (*p1 != NULL) { if ((*p1)->done == 0) { tp1 = *p1; *p1 = (*p1)->next; my_free(tp1, sizeof(struct offsets)); (*f) --; } else { (*p1)->done = 0; /* for the next round! */ p1 = &(*p1)->next; } }#if 0printf("sorted_intersection END: list1=\n\t");tp = *list1;while (tp != NULL) { printf("%d ", tp->offset); tp = tp->next;}printf("\n");printf("list2=\n\t");tp = *list2;while (tp != NULL) { printf("%d ", tp->offset); tp = tp->next;}printf("\n");#endif}purge_offsets(p1) struct offsets **p1;{ struct offsets *tp1; while (*p1 != NULL) { if ((*p1)->sign == 0) { tp1 = *p1; (*p1) = (*p1)->next; my_free(tp1, sizeof(struct offsets)); } else p1 = &(*p1)->next; }}/* Returns 1 if it is a Universal set, 0 otherwise. Constraint: WORD_END_MARK/ALL_INDEX_MARK must occur at or after buffer[0] */get_set(buffer, set, offset_table, patlen, pattern, patattr, outfile, partfp, frequency, prevfreq) unsigned char *buffer; unsigned int *set; struct offsets **offset_table; int patlen; char *pattern; int patattr; FILE *outfile; FILE *partfp; int *frequency, prevfreq;{ int bdx2, j; int ret; int x=0, y=0, diff, even_words=1, prevy; int indexattr = 0; struct offsets *o, *tailo, *heado; int delim = encode8b(0); int curfreq = 0; unsigned char c; /* buffer[0] is '\n', search must start from buffer[1] */ bdx2 = 1; if (OneFilePerBlock) while((bdx2<REAL_INDEX_BUF+1) && (buffer[bdx2] != WORD_END_MARK) && (buffer[bdx2] != ALL_INDEX_MARK)) bdx2++; else while((bdx2<REAL_INDEX_BUF+1) && (buffer[bdx2] != WORD_END_MARK)) bdx2++; if (bdx2 >= REAL_INDEX_BUF+1) return 0; if (StructuredIndex) { if (StructuredIndex < MaxNum8bPartition - 1) { indexattr = decode8b(buffer[bdx2+1]); } else { indexattr = decode16b((buffer[bdx2+1] << 8) | (buffer[bdx2 + 2])); } /* printf("i=%d p=%d\n", indexattr, patattr); */ if ((patattr > 0) && (indexattr != patattr)) {#if BG_DEBUG fprintf(debug, "indexattr=%d DOES NOT MATCH patattr=%d\n", indexattr, patattr);#endif /*BG_DEBUG*/ return 0; } } if (PRINTINDEXLINE) { c = buffer[bdx2]; buffer[bdx2] = '\0'; printf("%s %d", &buffer[1], indexattr); buffer[bdx2] = c; if (c == ALL_INDEX_MARK) printf(" ! "); else printf(" : "); } if (OneFilePerBlock && (buffer[bdx2] == ALL_INDEX_MARK)) { /* A intersection Univ-set = A: so src_index_set won't change; A union Univ-set = Univ-set: so src_index_set = all 1s */#if BG_DEBUG buffer[bdx2] = '\0'; fprintf(debug, "All indices search for %s\n", buffer + 1); buffer[bdx2] = ALL_INDEX_MARK;#endif /*BG_DEBUG*/ set[REAL_PARTITION - 1] = 1; for(bdx2=0; bdx2<round(OneFilePerBlock, 8*sizeof(int)) - 1; bdx2++) { set[bdx2] = 0xffffffff; } set[bdx2] = 0; for (j=0; j<8*sizeof(int); j++) { if (bdx2*8*sizeof(int) + j >= OneFilePerBlock) break; set[bdx2] |= mask_int[j]; } set[REAL_PARTITION - 1] = 1; if (ByteLevelIndex && !RecordLevelIndex) NOBYTELEVEL = 1; /* With RecordLevelIndex, I want NOBYTELEVEL to be unused (i.e., !NOBYTELEVEL is always true) */ return 1; } else if (!OneFilePerBlock) { /* check only if index+partitions are NOT split */#if BG_DEBUG buffer[bdx2] = '\0'; fprintf(debug, "memagrep-line: %s\t\tpattern: %s\n", buffer, pattern);#endif /*BG_DEBUG*/ /* ignore if pattern with all its options matches block number sequence: bg+udi: Feb/16/93 */ buffer[bdx2] = '\n'; /* memagrep needs buffer to end with '\n' */ if ((ret = memagrep_search(patlen, pattern, bdx2+1, buffer, 0, outfile)) <= 0) return 0; else buffer[bdx2] = WORD_END_MARK; } if ((StructuredIndex > 0) && (StructuredIndex < MaxNum8bPartition - 1)) bdx2 ++; else if (StructuredIndex > 0) bdx2 += 2; bdx2++; /* bdx2 now points to the first byte of the offset */ even_words = 1; /* Code identical to that in merge_in() in glimpseindex */ if (OneFilePerBlock) { get_block_numbers(&buffer[bdx2], &buffer[bdx2], partfp); while((bdx2<REAL_INDEX_BUF) && (buffer[bdx2] != '\n') && (buffer[bdx2] != '\0')) { /* First get the file name */ x = 0; if (ByteLevelIndex) { if (OneFilePerBlock <= MaxNum8bPartition) { x = decode8b(buffer[bdx2]); bdx2 ++; } else if (OneFilePerBlock <= MaxNum16bPartition) { x = (buffer[bdx2] << 8) | buffer[bdx2+1]; x = decode16b(x); bdx2 += 2; } else { x = (buffer[bdx2] << 16) | (buffer[bdx2+1] << 8) | buffer[bdx2+2]; x = decode24b(x); bdx2 += 3; } } else if (OneFilePerBlock <= MaxNum8bPartition) { x = decode8b(buffer[bdx2]); bdx2 ++; } else if (OneFilePerBlock <= MaxNum12bPartition) { if (even_words) { x = ((buffer[bdx2+1] & 0x0000000f) << 8) | buffer[bdx2]; x = decode12b(x); bdx2 += 2; even_words = 0; } else { /* odd number of words so far */ x = ((buffer[bdx2-1] & 0x000000f0) << 4) | buffer[bdx2]; x = decode12b(x); bdx2 ++; even_words = 1; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -