📄 glimpse.c
字号:
if ((disable_list != NULL) && (disable_list[block2index(i)] & mask_int[i%(8*sizeof(int))])) continue; /* nop since disable_list IS NULL */ strcpy(name, LIST_GET(name_list, i)); tcompress_file(name, outname, TC_REMOVE | TC_EASYSEARCH | TC_OVERWRITE | TC_NOPROMPT); } }docleanup: /* Restore old search-dictionaries */ sprintf(s, "%s/.glimpse_tempdir.%d/.glimpse_index", INDEX_DIR, pid); if (!access(s, R_OK)) {#if SFS_COMPAT sprintf(s1, "%s/%s", INDEX_DIR, INDEX_FILE); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, INDEX_FILE); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, P_TABLE); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, P_TABLE); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, NAME_LIST); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, NAME_LIST); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, NAME_LIST_INDEX); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, NAME_LIST_INDEX); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, NAME_HASH); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, NAME_HASH); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, NAME_HASH_INDEX); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, NAME_HASH_INDEX); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, MINI_FILE); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, MINI_FILE); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, DEF_STAT_FILE); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, DEF_STAT_FILE); rename(s, s1); sprintf(s1, "%s/%s", INDEX_DIR, ATTRIBUTE_FILE); sprintf(s, "%s/.glimpse_tempdir.%d/%s", INDEX_DIR, pid, ATTRIBUTE_FILE); rename(s, s1);#else /* sprintf(s, "exec %s -f %s/.glimpse_tempdir.%d/.glimpse_* %s\n", SYSTEM_MV, INDEX_DIR, pid, INDEX_DIR); */ sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, escapesinglequote(INDEX_FILE, es2), INDEX_DIR); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, P_TABLE, escapesinglequote(INDEX_DIR, es2)); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, NAME_LIST, escapesinglequote(INDEX_DIR, es2)); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, NAME_LIST_INDEX, escapesinglequote(INDEX_DIR, es2)); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, NAME_HASH, escapesinglequote(INDEX_DIR, es2)); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, NAME_HASH_INDEX, escapesinglequote(INDEX_DIR, es2)); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, MINI_FILE, escapesinglequote(INDEX_DIR, es2)); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, DEF_STAT_FILE, escapesinglequote(INDEX_DIR, es2)); system(s); sprintf(s, "exec %s -f '%s/.glimpse_tempdir.%d/%s' '%s'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), pid, ATTRIBUTE_FILE, escapesinglequote(INDEX_DIR, es2)); system(s);#endif sprintf(s, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rmdir(s); } printf("\nBuilt new cast-dictionary in %s\n", INDEX_DIR);#else /*BUILDCAST*/ if (AddToIndex || DeleteFromIndex || FastIndex) { /* Not handling byte level indices here for now */ int indextype = 0, indexnumber = OFF, structuredindex = OFF, recordlevelindex = OFF, temp_attr_num = 0, bytelevelindex = OFF; char temp_rdelim[MAX_LINE_LEN]; sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE); if (-1 == stat(s, &istbuf)) { if (AddToIndex || DeleteFromIndex) { fprintf(stderr, "Cannot find previous index %s! Fresh indexing recommended\n", s); return usage(0); } file_num = 0; file_id = 0; part_num = 1; goto fresh_indexing; } /* Find out existing index of words and partitions/filenumbers */ if ((indextype = get_index_type(s, &indexnumber, &indextype, &structuredindex, temp_rdelim)) < 0) {#if 0 fprintf(stderr, "Fresh indexing recommended: -a and -f are not supported with -b as yet\n"); exit(1); /* we support it now */#endif } if (structuredindex == -2) { recordlevelindex = 1; bytelevelindex = 1; } if (structuredindex <= 0) structuredindex = 0; else { temp_attr_num = structuredindex; structuredindex = 1; } file_num = part_num = 0; sprintf(s, "%s/%s", INDEX_DIR, NAME_LIST); file_num = get_array_of_lines(s, name_list, MaxNum24bPartition, 1); initialize_disable_list(file_num); initialize_data_structures(file_num); if (!indextype) { sprintf(s, "%s/%s", INDEX_DIR, P_TABLE); part_num = get_table(s, p_table, MAX_PARTITION, 1) - 1; /* part_num INCLUDES last partition */ } else merge_splits(); /* Check for errors, Set OneFilePerBlock */ if ( (file_num <= 0) || (!indextype && (part_num <= 0)) ) { if (AddToIndex || DeleteFromIndex) { fprintf(stderr, "Cannot find previous glimpseindex files! Fresh indexing recommended\n"); return usage(0); } file_num = 0; file_id = 0; part_num = 1; my_free(disable_list); disable_list = NULL; goto fresh_indexing; } if (OneFilePerBlock && !indextype) { fprintf(stderr, "Warning: ignoring option -o: using format of existing index\n"); OneFilePerBlock = 0; ByteLevelIndex = 0; } else { OneFilePerBlock = abs(indextype); if (indextype < 0) ByteLevelIndex = ON; } if (StructuredIndex && !structuredindex) { fprintf(stderr, "Warning: ignoring option -s: using format of existing index\n"); StructuredIndex = 0; attr_num = 0; } else { StructuredIndex = structuredindex; attr_num = temp_attr_num; } if (RecordLevelIndex && !recordlevelindex) { fprintf(stderr, "Warning: ignoring option -r: using format of existing index\n"); RecordLevelIndex = 0; ByteLevelIndex = 0; rdelim[0] = '\0'; old_rdelim[0] = '\0'; rdelim_len = 0; } else { RecordLevelIndex = recordlevelindex; strcpy(old_rdelim, temp_rdelim); strcpy(rdelim, old_rdelim); rdelim_len = strlen(rdelim); preprocess_delimiter(rdelim, rdelim_len, rdelim, &rdelim_len); } /* Used in FastIndex for all existing files, used in AddToIndex/DeleteFromIndex if we are trying to add/remove an existing file */ build_filename_hashtable(name_list, file_num);#if 0 /* Test if these are inverses of each other */ save_data_structures(); merge_splits();#endif /*0*/ /* * FastIndex: set disable-flag for unchanged files: remove AND * disable non-existent files. Let hole remain in file-names/partitions. */ if (FastIndex) { for (i=0; i<file_num; i++) if (-1 == my_stat(LIST_GET(name_list, i), &stbuf)) { remove_filename(i, -1); } else if (((stbuf.st_mode & S_IFMT) == S_IFREG) && (stbuf.st_ctime <= istbuf.st_ctime)) { /* This is just used as a cache since exclude/include processing is not done here: see dir.c */ disable_list[block2index(i)] |= mask_int[i % (8*sizeof(int))]; } else { /* Can't do it for directories since files in it can be modified w/o date reflected in the directory. Same for symlinks. */ LIST_ADD(size_list, i, stbuf.st_size, int); disable_list[block2index(i)] &= ~(mask_int[i % (8*sizeof(int))]); } } /* * AddToIndex without FastIndex: disable all existing files, remove those that don't exist now. * Out of old ones, only ADDED FILES are re-enabled: dir.c */ else if (AddToIndex) { for (i=0; i<file_num; i++) { if (-1 == my_stat(LIST_GET(name_list, i), &stbuf)) { remove_filename(i, -1); } else { LIST_ADD(size_list, i, stbuf.st_size, int); /* ONLY for proper statistics in save_data_structures() */ disable_list[block2index(i)] |= mask_int[i % (8*sizeof(int))]; } } } /* else: DeleteFromIndex without FastIndex: don't touch other files */ old_file_num = file_num; destroy_data_structures(); /* Put old/new files into partitions/filenumbers */ if (-1 == oldpartition(argc, argv)) { for(i=0;i<file_num;i++) {#if BG_DEBUG memory_usage -= (strlen(LIST_GET(name_list, i)) + 2);#endif /*BG_DEBUG*/ if (LIST_GET(name_list, i) != NULL) { my_free(LIST_GET(name_list, i), 0); LIST_SUREGET(name_list, i) = NULL; } } file_num = 0; file_id = 0; for (i=0;i<part_num; i++) { p_table[i] = 0; } part_num = 1; my_free(disable_list); disable_list = NULL; goto fresh_indexing; } /* Reindex all the files but use the file-names obtained with oldpartition() */ if (cross_boundary(OneFilePerBlock, file_num)) { my_free(disable_list); disable_list = NULL; } initialize_data_structures(file_num); if (!DeleteFromIndex || FastIndex) build_index(); if ((deletedlist = get_removed_indices()) == NULL) new_file_num = file_num; else if (PurgeIndex) new_file_num = purge_index();#if BG_DEBUG fprintf(LOGFILE, "Built indices in %s/%s\n", INDEX_DIR, INDEX_FILE);#endif /*BG_DEBUG*/ goto docleanup; }fresh_indexing: /* remove it to create space since it can be large: don't need for fresh indexing */ sprintf(s, "%s/%s", INDEX_DIR, P_TABLE); unlink(s); /* These should be zeroed since they can confuse fsize and fsize_directory() */ AddToIndex = 0; FastIndex = 0;#if BG_DEBUG fprintf(LOGFILE, "Commencing fresh indexing\n");#endif /*BG_DEBUG*/ partition(argc, argv); destroy_filename_hashtable(); initialize_data_structures(file_num); old_file_num = file_num; build_index();#if BG_DEBUG fprintf(LOGFILE, "\nBuilt indices in %s/%s\n", INDEX_DIR, INDEX_FILE);#endif /*BG_DEBUG*/docleanup: cleanup(); save_data_structures(); destroy_filename_hashtable();#if BG_DEBUG fflush(LOGFILE); fclose(LOGFILE);#endif /*BG_DEBUG*/ fflush(MESSAGEFILE); fclose(MESSAGEFILE); fflush(STATFILE); fclose(STATFILE); if (AddedMaxWordsMessage) printf("\nSome files contributed > %d words to the index: check %s\n", MAXWORDSPERFILE, DEF_MESSAGE_FILE); if (AddedMixedWordsMessage) printf("Some files had numerals in > %d%% of the indexed words: check %s\n", NUMERICWORDPERCENT, DEF_MESSAGE_FILE); printf("\nIndex-directory: \"%s\"\nGlimpse-files created here:\n", INDEX_DIR); chdir(INDEX_DIR); sprintf(s, "exec %s -l .glimpse_* > %s/%d\n", SYSTEM_LS, TEMP_DIR,pid); system(s); sprintf(s, "%s/%d", TEMP_DIR,pid); if ((tmpfp = fopen(s, "r")) != NULL) { memset(tmpbuf, '\0', 1024); while(fgets(tmpbuf, 1024, tmpfp) != NULL) fputs(tmpbuf, stdout); fflush(tmpfp); fclose(tmpfp); unlink(s); } else fprintf(stderr, "cannot open %s to `cat': check %s for .glimpse - files\n", s, INDEX_DIR);#endif /*BUILDCAST*/ if (!ATLEASTONEFILE) exit(1); return 0;}cleanup(){ char s[MAX_LINE_LEN]; sprintf(s, "%s/%s", INDEX_DIR, I1); unlink(s); sprintf(s, "%s/%s", INDEX_DIR, I2); unlink(s); sprintf(s, "%s/%s", INDEX_DIR, I3); unlink(s); sprintf(s, "%s/%s", INDEX_DIR, O1); unlink(s); sprintf(s, "%s/%s", INDEX_DIR, O2); unlink(s); sprintf(s, "%s/%s", INDEX_DIR, O3); unlink(s); sprintf(s, "%s/.glimpse_apply.%d", INDEX_DIR, getpid()); unlink(s);}#if !BUILDCASTusage(flag)int flag;{ if (flag) fprintf(stderr, "\nThis is glimpseindex version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE); fprintf(stderr, "usage: %s [-help] [-a] [-d] [-f] [-i] [-n [X]] [-o] [-r delim] [-s] [-t] [-w X] [-B] [-F] [-H DIR] [-I] [-M X] [-R] [-S X] [-T] [-V] NAMES\n", IProgname); fprintf(stderr, "List of options (see %s for more details):\n", GLIMPSE_URL); fprintf(stderr, "-help: outputs this menu\n"); fprintf(stderr, "-a: add given files/directories to an existing index\n"); fprintf(stderr, "-b: build a (large) byte-level index \n"); fprintf(stderr, "-B: use a hash table that is 4 times bigger (256k entries instead of 64K) \n"); fprintf(stderr, "-d NAMES: delete (file or directory) NAMES from an existing index\n"); fprintf(stderr, "-D NAMES: delete NAMES from the list of files (but not from the index!)\n"); fprintf(stderr, "-E: do not run a check on file types\n"); fprintf(stderr, "-f: incremental indexing (add all newly modified files)\n"); fprintf(stderr, "-F: the list of files to index is obtained from standard input\n"); fprintf(stderr, "-h: generates some hash-tables for WebGlimpse\n"); fprintf(stderr, "-H DIR: the index is put in directory DIR\n"); fprintf(stderr, "-i: make .glimpse_include take precedence over .glimpse_exclude\n"); fprintf(stderr, "-I: output the list of files that would be indexed (but don't index)\n"); fprintf(stderr, "-M X: use X MBytes of memory for temporary tables\n"); fprintf(stderr, "-n [X]: index numbers as well as words; warn (into .glimpse_messages)\n\tif file adds > X%% numeric words: default is %d\n", DEF_NUMERIC_WORD_PERCENT); fprintf(stderr, "-o: build a small (rather than tiny) size index (the recommended option!)\n"); /*fprintf(stderr, "-O: when using -r option, store byte offset of each record,\n\tinstead of the record number, for faster access\n");*/ fprintf(stderr, "-r delim: build an index at the granularity of delimiter `delim'\n\tto do booleans by reading ONLY the index\n"); fprintf(stderr, "-R: recompute .glimpse_filenames_index from .glimpse_filenames if it changes\n"); fprintf(stderr, "-s: build index to support structured (Harvest SOIF type) queries\n"); fprintf(stderr, "-S X: adjust the size of the stop list\n"); fprintf(stderr, "-t: sort the indexed files by date and time (most recent first)\n"); fprintf(stderr, "-T: build .glimpse_turbo for very fast search with -i -w in glimpse\n"); fprintf(stderr, "-U: there is extra information after filenames: works only with -F\n"); fprintf(stderr, "-w X: warn (into .glimpse_messages) if a file adds >= X words to the index\n"); fprintf(stderr, "-X: extract titles of all documents with .html, .htm, .shtm, .shtml suffix\n"); fprintf(stderr, "-z: customizable filtering using .glimpse_filters \n"); fprintf(stderr, "\n"); fprintf(stderr, "For questions about glimpse, please contact: `%s'\n", GLIMPSE_EMAIL); exit(1);}#else /*!BUILDCAST*/usage(flag)int flag;{ if (flag) fprintf(stderr, "\nThis is buildcast version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE); fprintf(stderr, "usage: %s [-help] [-t] [-i] [-l] [-n [X]] [-w X] [-C] [-E] [-F] [-H DIR] [-V] NAMES\n", IProgname); fprintf(stderr, "summary of frequently used options\n(for a more detailed listing see 'man cast'):\n"); fprintf(stderr, "-help: output this menu\n"); fprintf(stderr, "-n [X]: index numbers as well as words; warn (into .glimpse_messages)\n\tif file adds > X%% numeric words: default is %d\n", DEF_NUMERIC_WORD_PERCENT); fprintf(stderr, "-w X: warn if a file adds > X words to the index\n"); fprintf(stderr, "-C: compress files with the new dictionary after building it\n"); fprintf(stderr, "-E: build cast dictionary using existing compressed files only\n"); fprintf(stderr, "-F: expect filenames on stdin (useful for pipelining)\n"); fprintf(stderr, "-H DIR: .glimpse-files should be in directory DIR: default is '~'\n"); fprintf(stderr, "\n"); fprintf(stderr, "For questions about glimpse, please contact: `%s'\n", GLIMPSE_EMAIL); exit(1);}#endif /*!BUILDCAST*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -