📄 io.c
字号:
patlen += 2; } else if (filterbuf[commandpos] != '\'') { commandpos ++; patlen ++; } else break; } if ((commandpos >= len) || (patlen <= 0)) continue; commandpos ++; } else { patpos = commandpos; patlen = 0; while ((commandpos < len) && (filterbuf[commandpos] != ' ') && (filterbuf[commandpos] != '\t')) { commandpos ++; patlen ++; } while ((commandpos < len) && ((filterbuf[commandpos] == ' ') || (filterbuf[commandpos] == '\t'))) commandpos ++; if (commandpos >= len) continue; } memcpy(tempbuf, &filterbuf[patpos], patlen); tempbuf[patlen] = '\0'; if ((filter_len[num_filter] = convert2agrepregexp(tempbuf, patlen)) == 0) continue; /* inplace conversion */ filter[num_filter] = (unsigned char *) strdup(tempbuf); filter_command[num_filter] = (unsigned char *)strdup(&filterbuf[commandpos]); num_filter ++; } fclose(filterfile); } load_dyn_filters(); /* load filters in shared libraries -- CV 9/14/99 */}/* 1 if filter application was successful and the output (>1B) is in outname, 2 if some pattern matched but there is no output, 0 otherwise: sep 15-18 '94 *//* memagrep is initialized in partition.c for calls from dir.c, and it is already done by the time we call this function from main.c */apply_filter(inname, outname) char *inname, *outname; /* outname is in-out, inname is in */{ int i; char name[MAX_LINE_LEN], es1[MAX_LINE_LEN], es2[MAX_LINE_LEN]; int name_len = strlen(inname); char s[MAX_LINE_LEN]; FILE *dummyout; FILE *dummyin; char dummybuf[4]; char prevoutname[MAX_LINE_LEN]; char newoutname[MAX_LINE_LEN]; char tempoutname[MAX_LINE_LEN]; char tempinname[MAX_LINE_LEN]; int ret = 0; int unlink_prevoutname = 0; if (num_filter <= 0) return 0; if ((dummyout = fopen("/dev/null", "w")) == NULL) return 0; /* ready for memgrep */ name[0] = '\n'; special_get_name(inname, name_len, tempinname); name_len = strlen(tempinname); strcpy(name+1, tempinname); strcpy(prevoutname, tempinname); strcpy(newoutname, outname); /* Current properly filtered output is always in prevoutname */ for(i=0; i<num_filter; i++) { if (filter_len[i] > 0) { char *suffix; name[name_len + 1] = '\0'; if ((suffix = strstr(name+1, filter[i])) != NULL) { /* Chris Dalton */ if (ret == 0) ret = 2; /* yes, it matched: now apply the command and get the output */ /* printf("filtering %s\n", name); */ /* new filter function -- CV 9/14/99 */ apply_one_filter(i, prevoutname, newoutname); if (((dummyin = my_fopen(newoutname, "r")) == NULL) || (fread(dummybuf, 1, 1, dummyin) <= 0)) { if (dummyin != NULL) fclose(dummyin); unlink(newoutname); continue; } /* Filter was successful: output exists and has atleast 1 byte in it */ fclose(dummyin); if (unlink_prevoutname) { unlink(prevoutname); strcpy(tempoutname, prevoutname); strcpy(prevoutname, newoutname); strcpy(newoutname, tempoutname); } else { strcpy(prevoutname, newoutname); sprintf(newoutname, "%s.o", prevoutname); } ret = 1; unlink_prevoutname = 1;#if 1 /* if the matched text was a proper suffix of the name, */ /* remove the suffix just processed before examining the */ /* name again. Chris Dalton */ /* And I don't know what the equivalent thing is with */ /* memagrep_search: since it doesn't return a pointer to */ /* the place where the match occured. Burra Gopal */ if (strcmp(filter[i], suffix) == 0) { name_len -= strlen(suffix); *suffix= '\0'; }#endif /*1*/ if (strlen(newoutname) >= MAX_LINE_LEN - 1) break; } } else { /* must call memagrep */ name[name_len + 1] = '\n'; /* memagrep wants names to end with '\n': '\0' is not necessary */ /* printf("i=%d filterlen=%d filter=%s inlen=%d input=%s\n", i, -filter_len[i], filter[i], len_current_dir_buf, current_dir_buf); */ if (((filter_len[i] == -2) && (filter[i][0] == '.') && (filter[i][1] == '*')) || (memagrep_search(-filter_len[i], filter[i], name_len + 2, name, 0, dummyout) > 0)) { if (ret == 0) ret = 2; /* yes, it matched: now apply the command and get the output */ /* printf("filtering %s\n", name); */ /* new filter function -- CV 9/14/99 */ apply_one_filter(i, prevoutname, newoutname); if (((dummyin = my_fopen(newoutname, "r")) == NULL) || (fread(dummybuf, 1, 1, dummyin) <= 0)) { if (dummyin != NULL) fclose(dummyin); unlink(newoutname); continue; } /* Filter was successful: output exists and has atleast 1 byte in it */ fclose(dummyin); if (unlink_prevoutname) { unlink(prevoutname); strcpy(tempoutname, prevoutname); strcpy(prevoutname, newoutname); strcpy(newoutname, tempoutname); } else { strcpy(prevoutname, newoutname); sprintf(newoutname, "%s.o", prevoutname); } ret = 1; unlink_prevoutname = 1; if (strlen(newoutname) >= MAX_LINE_LEN - 1) break; } } } if (ret == 1) strcpy(outname, prevoutname); else { /* dummy filter that copies input to output: caller can use tempinname but this has easy interface */ /* replaced system() call with a simple copy function. -- CV 9/14/99 */ copy_file(tempinname, outname); } fclose(dummyout); return ret;}/* Use a modified wais stoplist to do this with simple strcmp's in a for loop */static_stop_list(word) char *word;{ return 0;}/* This is the stuff that used to be present in the old build_in.c *//* Some variables used throughout */FILE *TIMEFILE; /* file descriptor for sorting .glimpse_filenames by time */#if BG_DEBUGFILE *LOGFILE; /* file descriptor for LOG output */#endif /*BG_DEBUG*/FILE *STATFILE; /* file descriptor for statistical data about indexed files */FILE *MESSAGEFILE; /* file descriptor for important messages meant for the user */char INDEX_DIR[MAX_LINE_LEN];char sync_path[MAX_LINE_LEN];struct stat istbuf;struct stat excstbuf;struct stat incstbuf;int ICurrentFileOffset;int NextICurrentFileOffset;/* Some options used throughout */int GenerateHash = OFF;int KeepFilenames = OFF;int OneFilePerBlock = OFF;int total_size = 0;int total_deleted = 0;int MAXWORDSPERFILE = 0;int NUMERICWORDPERCENT = DEF_NUMERIC_WORD_PERCENT;int AddToIndex = OFF;int DeleteFromIndex = OFF;int PurgeIndex = ON;int FastIndex = OFF;int BuildDictionary = OFF;int BuildDictionaryExisting = OFF;int CompressAfterBuild = OFF;int IncludeHigherPriority = OFF;int FilenamesOnStdin = OFF;int ExtractInfo = OFF;int InfoAfterFilename = OFF;int FirstWordOfInfoIsKey = OFF;int UseFilters = OFF;int ByteLevelIndex = OFF;int RecordLevelIndex = OFF; /* When we want a -o like index but want to do booleans on a per-record basis directly from index: robint@zedcor.com */ /* This type of index doesn't make sense with attributes since they span > 1 record; hence StructuredIndex == -2 => this = ON */int StoreByteOffset = OFF; /* In RecordLevelIndex, store record # for each word or byte offset of the record: record # is the default (12/12/96) */char rdelim[MAX_LINE_LEN];char old_rdelim[MAX_LINE_LEN];int rdelim_len = 0;/* int IndexUnderscore = OFF; */int IndexableFile = OFF;int MAX_INDEX_PERCENT = DEF_MAX_INDEX_PERCENT;int MAX_PER_MB = DEF_MAX_PER_MB;int I_THRESHOLD = DEF_I_THRESHOLD;int BigHashTable = OFF;int IndexEverything = OFF;int HashTableSize = MAX_64K_HASH;int BuildTurbo = OFF;int SortByTime = OFF;int AddedMaxWordsMessage = OFF;int AddedMixedWordsMessage = OFF;int icount=0; /* count the number of my_malloc for indices structure */int hash_icount=0; /* to see how much was added to the current hash table */int save_icount=0; /* to see how much was added to the index by the current file */int numeric_icount=0; /* to see how many numeric words were there in the current file */int mask_int[32] = MASK_INT;int p_table[MAX_PARTITION];int memory_usage = 0;char *my_malloc(len) int len;{ char *s; static int i=100; if ((s = malloc(len)) != NULL) memory_usage += len; else fprintf(stderr, "malloc failed after memory_usage = %x Bytes\n", memory_usage); /* Don't exit since might do traverse here: exit in glimpse though */#if BG_DEBUG printf("m:%x ", memory_usage); i--; if (i==0) { printf("\n"); i = 100; }#endif /*BG_DEBUG*/ return s;}my_free(ptr, size) void *ptr; int size;{ if (ptr) free(ptr); memory_usage -= size;#if BG_DEBUG printf("f:%x ", memory_usage);#endif /*BG_DEBUG*/}int file_num = 0;int old_file_num = 0; /* upto what file number should disable list be accessed: < file_num if incremental indexing */int new_file_num = -1; /* after purging index, how many files are left: for save_data_structures() */int bp=0; /* buffer pointer */unsigned char word[MAX_WORD_BUF];int FirstTraverse1 = ON;struct indices *ip;/* Globals used in merge, and also in glimpse's get_index.c */unsigned int *src_index_set = NULL;unsigned int *dest_index_set = NULL;unsigned char *src_index_buf = NULL;unsigned char *dest_index_buf = NULL;unsigned char *merge_index_buf = NULL;/* * Routines for zonal memory allocation for glimpseindex and very fast search in glimpse. */int next_free_token = 0;struct token *free_token = NULL; /*[I_THRESHOLD/AVG_OCCURRENCES]; */int next_free_indices = 0;struct indices *free_indices = NULL; /*[I_THRESHOLD]; */int next_free_word = 0;char *free_word = NULL; /*[I_THRESHOLD/AVG_OCCURRENCES * AVG_WORD_LEN]; */extern int usemalloc;/* * The beauty of this allocation scheme is that "free" does not need to be implemented! */tokenallfree(){ next_free_token = 0;}tokenfree(e, len)struct token *e;int len;{ if (usemalloc) my_free(e, sizeof(struct token));}struct token *tokenalloc(len)int len;{ struct token *e; if (usemalloc) (e) = (struct token *)my_malloc(sizeof(struct token)); else { if (free_token == NULL) free_token = (struct token *)my_malloc(sizeof(struct token) * I_THRESHOLD / INDICES_PER_TOKEN); if (free_token == NULL) {fprintf(stderr, "malloc failure in tokenalloc()\n"); exit(2);} else (e) = ((next_free_token >= I_THRESHOLD / INDICES_PER_TOKEN) ? (NULL) : (&(free_token[next_free_token ++]))); } return e;}indicesallfree(){ next_free_indices = 0;}indicesfree(e, len)struct indices *e;int len;{ if (usemalloc) my_free(e, sizeof(struct indices));}struct indices *indicesalloc(len)int len;{ struct indices *e; if (usemalloc) (e) = (struct indices *)my_malloc(sizeof(struct indices)); else { if (free_indices == NULL) free_indices = (struct indices *)my_malloc(sizeof(struct indices) * I_THRESHOLD); if (free_indices == NULL) {fprintf(stderr, "malloc failure in indicesalloc()\n"); exit(2);} else (e) = ((next_free_indices >= I_THRESHOLD) ? (NULL) : (&(free_indices[next_free_indices ++]))); } return e;}/* For words in a token structure */wordallfree(){ next_free_word = 0;}wordfree(s, len)char *s;int len;{ if (usemalloc) my_free(s, len);}char *wordalloc(len)int len;{ char *s; if (usemalloc) (s) = (char *)my_malloc(len); else { if (free_word == NULL) free_word = (char *)my_malloc(AVG_WORD_LEN * I_THRESHOLD/INDICES_PER_TOKEN); if (free_word == NULL) {fprintf(stderr, "malloc failure in wordalloc()\n"); exit(2); } else (s) = ((next_free_word + len + 2 >= AVG_WORD_LEN * I_THRESHOLD/INDICES_PER_TOKEN) ? (NULL) : (&(free_word[next_free_word]))); if (s != NULL) next_free_word += (len); /* 2 for 1 char word with '\0' */ } return s;}struct mini *mini_array = NULL;int mini_array_len = 0;#if WORD_SORTED/* * Routines that operate on the index using the mini-index. * * The index is a list of words+delim+attr+offset+\n sorted * by the word (using strcmp). * * The mini-index keeps track of the offsets in the index * where every WORDS_PER_REGION-th word in the index occurs. * There is no direct way for glimpse to seek into the mini * file for the exact offset of this word since unlike hash * values words are of variable length. * * This is small enough to be kept in memory and searched * directly with full word case insensitive string compares * with binary search. For 256000 words in index there will be * 256000/128 = 2000 words in mini-index that will occupy * 2000*32 (avgword + off + delim/attr + sizeof(struct mini)), * which is less than 16 pages (can always be resident in mem). * * We just need to string search log_2(2000) + 128 words of * length 12B each in the worst case ===> VERY FAST. This is * not the best possible but space is the limit. If we hash the * whole index/regions in the index, we need TOO MUCH memory. *//*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -