📄 glimpse.c
字号:
BigHashTable = 1; argc --; argv ++; } else if (strcmp(argv[1], "-E") == 0) { IndexEverything = 1; /* without doing stat tests, etc. */ argc --; argv ++; } else if(strcmp(argv[1], "-F") == 0) { FilenamesOnStdin = ON; argc--; argv++; } else if(strcmp(argv[1], "-X") == 0) { /* extract some info to append after a ' ' to filename in filename-buffer */ ExtractInfo = ON; argc--; argv++; } else if(strcmp(argv[1], "-U") == 0) { /* some information is there after blank after filename on same line as filename-buffer: makes sense only with -F */ InfoAfterFilename = ON; argc--; argv++; } else if(strcmp(argv[1], "-K") == 0) { /* first word of above info/or the extracted info is the key */ FirstWordOfInfoIsKey = ON; argc--; argv++; } /* else if(strcmp(argv[1], "-u") == 0) { IndexUnderscore = ON; argc--; argv++; } */ else if (strcmp(argv[1], "-H") == 0) { if (argc == 2) { fprintf(stderr, "-H should be followed by a directory name\n"); return usage(1); } strncpy(INDEX_DIR, argv[2], MAX_LINE_LEN); argc -= 2; argv += 2; } else break; /* rest are directory names */ } if (RecordLevelIndex && StructuredIndex) { fprintf(stderr, "-r and -s are not compatible!\n"); return usage(1); } if (StoreByteOffset && !RecordLevelIndex) { fprintf(stderr, "ignoring -O since -r was not specified\n"); StoreByteOffset = OFF; } if (InfoAfterFilename && !FilenamesOnStdin) { fprintf(stderr, "-U works only when -F is specified!\n"); return usage(1); } if (FirstWordOfInfoIsKey && !(InfoAfterFilename || ExtractInfo)) { fprintf(stderr, "-K works only when one of -X or -U are specified!\n"); return usage(1); } if (RecordLevelIndex) { /* printf("old_rdelim = %s rdelim = %s rdelim_len = %d\n", old_rdelim, rdelim, rdelim_len); */ preprocess_delimiter(rdelim, rdelim_len, rdelim, &rdelim_len); /* printf("processed rdelim = %s rdelim_len = %d\n", rdelim, rdelim_len); */ } if (ModifyFilenamesIndex) { int offset = 0; char buffer[1024]; FILE *filefp, *indexfp; sprintf(buffer, "%s/%s", INDEX_DIR, NAME_LIST); if ((filefp = fopen(buffer, "r")) == NULL) { fprintf(stderr, "Cannot open %s for reading\n", buffer); exit(2); } sprintf(buffer, "%s/%s.tmp", INDEX_DIR, NAME_LIST_INDEX); if ((indexfp = fopen(buffer, "w")) == NULL) { fprintf(stderr, "Cannot open %s for writing\n", buffer); exit(2); } fgets(buffer, 1024, filefp); /* skip over num. of file names */ offset += strlen(buffer); while (fgets(buffer, 1024, filefp) != NULL) { putc((offset & 0xff000000) >> 24, indexfp); putc((offset & 0xff0000) >> 16, indexfp); putc((offset & 0xff00) >> 8, indexfp); putc((offset & 0xff), indexfp); offset += strlen(buffer); } fflush(filefp); fclose(filefp); fflush(indexfp); fclose(indexfp);#if SFS_COMPAT sprintf(s, "%s/%s.tmp", INDEX_DIR, NAME_LIST_INDEX); sprintf(s1, "%s/%s", INDEX_DIR, NAME_LIST_INDEX); return rename(s, s1);#else sprintf(buffer, "mv %s/%s.tmp %s/%s", INDEX_DIR, NAME_LIST_INDEX, INDEX_DIR, NAME_LIST_INDEX); return system(buffer);#endif } BuildTurbo = ON; /* always ON: user can remove .glimpse_turbo if not needed */ /* * Look for invalid option combos. */ if ((argc<=1) && (!FilenamesOnStdin) && !FastIndex) { return usage(1); } if (DeleteFromIndex && (AddToIndex || CountWords || IndexableFile)) { /* With -f, it is automatic for files not found in OS but present in index; without it, an explicit set of files is required as argument on cmdline */ fprintf(stderr, "-d cannot be used with -I, -a or -c (see man pages)\n"); exit(1); } if (ByteLevelIndex) { if (MAX_PER_MB <= 0) { fprintf(stderr, "Stop list limit (#of occurrences per MB) '%d' must be > 0\n", MAX_PER_MB); exit(1); } } else if (OneFilePerBlock) { if ((MAX_INDEX_PERCENT <= 0) || (MAX_INDEX_PERCENT > 100)) { fprintf(stderr, "Stop list limit (%% of occurrences in files) '%d' must be in (0, 100]\n", MAX_INDEX_PERCENT); exit(1); } } /* * Find the index directory since it is used in all options. */ if (INDEX_DIR[0] == '\0') { if ((indexdir = getenv("HOME")) == NULL) { getcwd(INDEX_DIR, MAX_LINE_LEN-1); fprintf(stderr, "Using working-directory '%s' to store index\n\n", INDEX_DIR); } else strncpy(INDEX_DIR, indexdir, MAX_LINE_LEN); } getcwd(working_dir, MAX_LINE_LEN - 1); if (-1 == chdir(INDEX_DIR)) { fprintf(stderr, "Cannot change directory to %s\n", INDEX_DIR); return usage(0); } getcwd(INDEX_DIR, MAX_LINE_LEN - 1); /* must be absolute path name */ chdir(working_dir); /* get back to where you were */ if (IndexableFile) { /* traverse the given directories and output names of files that are indexable on stdout */ SortByTime = OFF; partition(argc, argv); return 0; } else {#if BUILDCAST printf("\nThis is buildcast version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);#else /*BUILDCAST*/ printf("\nThis is glimpseindex version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);#endif /*BUILDCAST*/ } if (ByteLevelIndex) {#if 0 /* We'll worry about these things later */ if (AddToIndex || DeleteFromIndex || FastIndex) { fprintf(stderr, "Fresh indexing recommended: -a, -d and -f are not supported with -b as yet\n"); exit(1); } AddToIndex = FastIndex = OFF;#endif CountWords = OFF; OneFilePerBlock = ON; } if (SortByTime) { if (DeleteFromIndex || AddToIndex) { fprintf(stderr, "Fresh indexing recommended: -a and -d are not supported with -t as yet\n"); exit(1); } FastIndex = OFF; /* automatically shuts it off as of now: we shall optimize -t with -f later */ } /* * CONVENTION: all the relevant output is on stdout; warnings/errors are on stderr. * Initialize / open important files. */ read_filters(INDEX_DIR, UseFilters); freq_file[0] = hash_file[0] = string_file[0] = '\0'; strcpy(freq_file, INDEX_DIR); strcat(freq_file, "/"); strcat(freq_file, DEF_FREQ_FILE); strcpy(hash_file, INDEX_DIR); strcat(hash_file, "/"); strcat(hash_file, DEF_HASH_FILE); strcpy(string_file, INDEX_DIR); strcat(string_file, "/"); strcat(string_file, DEF_STRING_FILE); initialize_tuncompress(string_file, freq_file, 0); sprintf(s, "%s/%s", INDEX_DIR, DEF_TIME_FILE); if((TIMEFILE = fopen(s, "w")) == 0) { fprintf(stderr, "can't open %s for writing\n", s); exit(2); }#if BG_DEBUG sprintf(s, "%s/%s", INDEX_DIR, DEF_LOG_FILE); if((LOGFILE = fopen(s, "w")) == 0) { fprintf(stderr, "can't open %s for writing\n", s); LOGFILE = stderr; }#endif /*BG_DEBUG*/ sprintf(s, "%s/%s", INDEX_DIR, DEF_MESSAGE_FILE); if((MESSAGEFILE = fopen(s, "w")) == 0) { fprintf(stderr, "can't open %s for writing\n", s); MESSAGEFILE = stderr; } sprintf(s, "%s/%s", INDEX_DIR, DEF_STAT_FILE); if((STATFILE = fopen(s, "a")) == 0) { fprintf(stderr, "can't open %s for appending\n", s); STATFILE = stderr; } gettimeofday(&tv, NULL);#if BUILDCAST fprintf(STATFILE, "\nThis is buildcast version %s, %s. %s", GLIMPSE_VERSION, GLIMPSE_DATE, ctime(&tv.tv_sec));#else fprintf(STATFILE, "\nThis is glimpseindex version %s, %s. %s", GLIMPSE_VERSION, GLIMPSE_DATE, ctime(&tv.tv_sec));#endif#if BG_DEBUG fprintf(LOGFILE, "Index Directory = %s\n\n", INDEX_DIR);#endif /*BG_DEBUG*/ if (MAXWORDSPERFILE != 0) fprintf(MESSAGEFILE, "Index: maximum number of indexed words per file = %d\n", MAXWORDSPERFILE); else fprintf(MESSAGEFILE, "Index: maximum number of indexed words per file = infinity\n"); fprintf(MESSAGEFILE, "Index: maximum percentage of numeric words per file = %d\n", NUMERICWORDPERCENT); set_indexable_char(indexable_char);#if BUILDCAST CountWords = ON; AddToIndex = OFF; FastIndex = OFF; /* Save old search-dictionaries */ sprintf(s, "%s/.glimpse_index", INDEX_DIR); if (!access(s, R_OK)) { sprintf(s, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); if (-1 == mkdir(s, 0700)) { fprintf(stderr, "cannot create temporary directory %s\n", s); return -1; }#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, INDEX_FILE); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), INDEX_FILE, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, P_TABLE); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), P_TABLE, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, NAME_LIST); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), NAME_LIST, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, NAME_LIST_INDEX); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), NAME_LIST_INDEX, escapesinglequote(INDEX_DIR, es1), pid); system(s);#endif#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, NAME_HASH); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), NAME_HASH, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, NAME_HASH_INDEX); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), NAME_HASH_INDEX, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, MINI_FILE); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), MINI_FILE, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, DEF_STAT_FILE); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), DEF_STAT_FILE, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif /* Don't save messages, log, debug, etc. */ sprintf(s, "%s/.glimpse_attributes", INDEX_DIR); if (!access(s, R_OK)) {#if SFS_COMPAT sprintf(s, "%s/%s", INDEX_DIR, ATTRIBUTE_FILE); sprintf(s1, "%s/.glimpse_tempdir.%d", INDEX_DIR, pid); rename(s, s1);#else sprintf(s, "exec %s -f '%s/%s' '%s/.glimpse_tempdir.%d'\n", SYSTEM_MV, escapesinglequote(INDEX_DIR, es1), ATTRIBUTE_FILE, escapesinglequote(INDEX_DIR, es2), pid); system(s);#endif } } /* Backup old cast-dictionaries: don't use move since indexing might want to use them */ sprintf(s, "%s/.glimpse_quick", INDEX_DIR); if (!access(s, R_OK)) { /* there are previous cast dictionaries */ backup = rand(); sprintf(s, "%s/.glimpse_backup.%x", INDEX_DIR, backup); if (-1 == mkdir(s, 0700)) { fprintf(stderr, "cannot create backup directory %s\n", s); return -1; } sprintf(s, "exec %s -f '%s/.glimpse_quick' '%s/.glimpse_backup.%x'\n", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), escapesinglequote(INDEX_DIR, es2), backup); system(s); sprintf(s, "exec %s -f '%s/.glimpse_compress' '%s/.glimpse_backup.%x'\n", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), escapesinglequote(INDEX_DIR, es2), backup); system(s); sprintf(s, "exec %s -f '%s/.glimpse_compress.index' '%s/.glimpse_backup.%x'\n", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), escapesinglequote(INDEX_DIR, es2), backup); system(s); sprintf(s, "exec %s -f '%s/.glimpse_uncompress' '%s/.glimpse_backup.%x'\n", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), escapesinglequote(INDEX_DIR, es2), backup); system(s); sprintf(s, "exec %s -f '%s/.glimpse_uncompress.index' '%s/.glimpse_backup.%x'\n", SYSTEM_CP, escapesinglequote(INDEX_DIR, es1), escapesinglequote(INDEX_DIR, es2), backup); system(s); printf("Saved previous cast-dictionary in %s/.glimpse_backup.%x\n", INDEX_DIR, backup); } /* Now index these files, and build new dictionaries */ partition(argc, argv); initialize_data_structures(file_num); old_file_num = file_num; build_index(); cleanup(); save_data_structures(); destroy_filename_hashtable(); uninitialize_common(); uninitialize_tcompress(); uninitialize_tuncompress(); compute_dictionary(threshold, DISKBLOCKSIZE, specialwords, INDEX_DIR); if (CompressAfterBuild) { /* For the new compression */ if (!initialize_tcompress(hash_file, freq_file, TC_ERRORMSGS)) goto docleanup; printf("Compressing files with new dictionary...\n"); /* Use the set of file-names collected during partition() / modified during build_hash */ for(i=0; i<file_num; i++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -