📄 partition.c
字号:
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. *//* ./glimpse/index/partition.c */#include "glimpse.h"#include <sys/stat.h>#include <sys/time.h>extern int BigFilenameHashTable;extern int DeleteFromIndex;extern int FastIndex;extern int FilenamesOnStdin;extern char INDEX_DIR[MAX_LINE_LEN];extern char sync_path[MAX_LINE_LEN];extern int file_num; /* the number of files */extern int new_file_num; /* the new number of files after purging some from index */extern char **name_list[MAXNUM_INDIRECT]; /* to store the file names */extern int *size_list[MAXNUM_INDIRECT]; /* store size of each file */extern int p_table[MAX_PARTITION]; /* partition table, the i-th partition begins at p_table[i] and ends at p_tables[i+1] */extern int p_size_list[MAX_PARTITION]; /* sum of the sizes of the files in each partition */extern int part_num; /* number of partitions, 1 initially since partition # 0 is not accessed */extern int built_filename_hashtable;extern name_hashelement *name_hashtable[MAX_64K_HASH];extern int total_size; /* total size of the directory */extern int total_deleted; /* number of files being deleted */int part_size=DEFAULT_PART_SIZE; /* partition size */int new_partition;int files_per_partition;int files_in_partition;int ATLEASTONEFILE = 0;extern int errno;char patbuf[MAX_PAT];extern unsigned char *src_index_buf;extern unsigned char *dest_index_buf;extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE;extern int memory_usage;extern struct indices *deletedlist;extern FILE *TIMEFILE;extern FILE *STATFILE;extern FILE *MESSAGEFILE;extern struct stat excstbuf;extern struct stat incstbuf;extern int GenerateHash;extern int KeepFilenames;extern int OneFilePerBlock;extern int ByteLevelIndex;extern int RecordLevelIndex;extern int rdelim_len;extern char rdelim[MAX_LINE_LEN];extern char old_rdelim[MAX_LINE_LEN];extern int StructuredIndex;extern int attr_num;extern char INDEX_DIR[MAX_LINE_LEN];extern int AddToIndex;extern int IndexableFile;extern int BuildTurbo;extern int SortByTime;char *exin_argv[8];int exin_argc;char current_dir_buf[2*MAX_LINE_LEN + 4]; /* must have space to store pattern after directory name */unsigned char dummypat[MAX_PAT];int dummylen;FILE *dummyout;partition(dir_num, dir_name)char **dir_name;int dir_num;{ int num_pat=0; int num_inc=0; int len; long thetime; long prevtime; int theindex; int firsttime = 1; int xx; struct timeval tv; FILE *tmp_TIMEFILE; FILE *index_TIMEFILE; int ret; char **temp_name_list; int *temp_size_list; int temp_file_num; char S[MAX_LINE_LEN], S1[MAX_LINE_LEN], es1[MAX_LINE_LEN], es2[MAX_LINE_LEN], es3[MAX_LINE_LEN]; int pat_len[MAX_EXCLUSIVE]; int inc_len[MAX_EXCLUSIVE]; CHAR *inc[MAX_INCLUSIVE]; /* store the patterns used to mask in files */ CHAR *pat[MAX_EXCLUSIVE]; /* store the patterns that are used to mask out those files that are not to be indexed */ int MinPartNum; /* minimum number of partitions */ int i=0, j; int subtotal=0; int pdx = 0; /* index pointer for p_table */ FILE *patfile; /* file descriptor for prohibit pattern file */ FILE *incfile; /* file descriptor for include pattern file */ char *current_dir; /* must have '\n' before directory name */ char s[MAX_LINE_LEN]; char working_dir[MAX_LINE_LEN]; struct stat sbuf; current_dir_buf[0] = '\n'; current_dir_buf[1] = '\0'; current_dir = ¤t_dir_buf[1]; /* if (IndexableFile) goto directlytofsize; */ if ((dummyout = fopen("/dev/null", "w")) == NULL) return -1; exin_argv[0] = "glimpseindex"; exin_argv[1] = "dummypat"; exin_argc = 2; if ((dummylen = memagrep_init(exin_argc, exin_argv, MAX_PAT, dummypat)) <= 0) return -1; /* exclude/include pattern search */ sprintf(s, "%s/%s", INDEX_DIR, PROHIBIT_LIST); patfile = fopen(s, "r"); if(patfile == NULL) { /* fprintf(stderr, "can't open exclude-pattern file\n"); -- no need! */ num_pat = 0; } else { while((num_pat < MAX_EXCLUSIVE) && fgets(patbuf, MAX_PAT, patfile)) { if ((len = strlen(patbuf)) < 1) continue; patbuf[len-1] = '\0'; if ((pat_len[num_pat] = convert2agrepregexp(patbuf, len-1)) == 0) continue; pat[num_pat++] = (unsigned char *) strdup(patbuf); } fclose(patfile); }#if 0 printf("num_pat %d\n", num_pat); for(i=0; i<num_pat; i++) printf("len=%d pat=%s\n", pat_len[i], pat[i]); printf("memagrep=%d\n", memagrep_search(-pat_len[0], pat[0], 17, "\n.glimpse_index\nasdfk", 0, stdout));#endif sprintf(s, "%s/%s", INDEX_DIR, INCLUDE_LIST); incfile = fopen(s, "r"); if(incfile == NULL) { /* fprintf(stderr, "can't open include-pattern file\n"); -- no need! */ num_inc = 0; } else { while((num_inc < MAX_INCLUSIVE) && fgets(patbuf, MAX_PAT, incfile)) { if ((len = strlen(patbuf)) < 1) continue; patbuf[len-1] = '\0'; if ((inc_len[num_inc] = convert2agrepregexp(patbuf, len-1)) == 0) continue; inc[num_inc++] = (unsigned char *) strdup(patbuf); } fclose(incfile); }#if 0 printf("num_inc %d\n", num_inc); for(i=0; i<num_inc; i++) printf("len=%d inc=%s\n", inc_len[i], inc[i]);#endif#ifdef SW_DEBUG printf("dir_num = %d", dir_num-1);#endifdirectlytofsize: if ((dir_num <= 1) && (FilenamesOnStdin)) while (fgets(current_dir, MAX_LINE_LEN, stdin) == current_dir) { current_dir[strlen(current_dir)-1] = '\0'; /* overwrite \n with \0 */ /* Get absolute path name of the directory or file being indexed */ if (-1 == my_stat(current_dir, &sbuf)) { fprintf(stderr, "permission denied or non-existent: %s\n", current_dir); continue; } if ((S_ISDIR(sbuf.st_mode)) && (current_dir[0] != '/')) { getcwd(working_dir, MAX_LINE_LEN - 1); if (-1 == chdir(current_dir)) { fprintf(stderr, "Cannot chdir to %s\n", current_dir); continue; } getcwd(current_dir, MAX_LINE_LEN - 1); chdir(working_dir); } if (!IndexableFile) printf("Indexing \"%s\" ...\n", current_dir); fsize(current_dir, pat, pat_len, num_pat, inc, inc_len, num_inc, 0); /* the file names will be in name_list[]: NOT TOP LEVEL!!! Mar/11/96 */ } else for(i=1; i<dir_num; i++) { strcpy(current_dir, dir_name[i]); /* Get absolute path name of the directory or file being indexed */ if (-1 == my_stat(current_dir, &sbuf)) { fprintf(stderr, "permission denied or non-existent: %s\n", current_dir); continue; } if ((S_ISDIR(sbuf.st_mode)) && (current_dir[0] != '/')) { getcwd(working_dir, MAX_LINE_LEN - 1); if (-1 == chdir(current_dir)) { fprintf(stderr, "Cannot chdir to %s\n", current_dir); continue; } getcwd(current_dir, MAX_LINE_LEN - 1); chdir(working_dir); } if (!IndexableFile) { if (!DeleteFromIndex) printf("Indexing \"%s\" ...\n", current_dir); } fsize(current_dir, pat, pat_len, num_pat, inc, inc_len, num_inc, 1); /* the file names will be in name_list[] */ } if (IndexableFile) return 0; /* * If -t option is set, we must sort .glimpse_filenames (i.e., name_list and size_list) according to the most recent modification date. * The file-number VS its modification date are already available in .glimpse_filetimes which is created in main() and filled in fsize(). */ if (SortByTime && (file_num > 0)) { fflush(TIMEFILE); fclose(TIMEFILE);#if USESORT_Z_OPTION#if DONTUSESORT_T_OPTION || SFS_COMPAT sprintf(S, "exec %s -n -r -z %d '%s/%s' > '%s/%s.tmp'\n", SYSTEM_SORT, maxsortlinelen, escapesinglequote(INDEX_DIR, es1), DEF_TIME_FILE, escapesinglequote(INDEX_DIR, es2), DEF_TIME_FILE);#else sprintf(S, "exec %s -n -r -T '%s' -z %d '%s/%s' > '%s/%s.tmp'\n", SYSTEM_SORT, escapesinglequote(INDEX_DIR, es1), maxsortlinelen, escapesinglequote(INDEX_DIR, es2), DEF_TIME_FILE, escapesinglequote(INDEX_DIR, es3), DEF_TIME_FILE);#endif#else#if DONTUSESORT_T_OPTION || SFS_COMPAT sprintf(S, "exec %s -n -r '%s/%s' > '%s/%s.tmp'\n", SYSTEM_SORT, escapesinglequote(INDEX_DIR, es1), DEF_TIME_FILE, escapesinglequote(INDEX_DIR, es2), DEF_TIME_FILE);#else sprintf(S, "exec %s -n -r -T '%s' '%s/%s' > '%s/%s.tmp'\n", SYSTEM_SORT, escapesinglequote(INDEX_DIR, es1), escapesinglequote(INDEX_DIR, es2), DEF_TIME_FILE, escapesinglequote(INDEX_DIR, es3), DEF_TIME_FILE);#endif#endif#ifdef BG_DEBUG printf("%s", S);#endif if((ret=system(S)) != 0) { sprintf(S1, "system('%s') failed at:\n\t File=%s, Line=%d, Errno=%d", S, __FILE__, __LINE__, errno); perror(S1); fprintf(stderr, "Please try to run the program again\n(If there's no memory, increase the swap area / don't use -M and -B options)\n"); sprintf(S, "%s/%s", INDEX_DIR, DEF_TIME_FILE); unlink(S); exit(2); } sprintf(S, "%s/%s.tmp", INDEX_DIR, DEF_TIME_FILE); if ((tmp_TIMEFILE = fopen(S, "r")) == NULL) { fprintf(stderr, "can't open %s for reading\n", S); unlink(S); exit(2); } sprintf(S, "%s/%s", INDEX_DIR, DEF_TIME_FILE); if ((TIMEFILE = fopen(S, "w")) == NULL) { fprintf(stderr, "can't open %s for writing\n", S); unlink(S); exit(2); } sprintf(S, "%s/%s.index", INDEX_DIR, DEF_TIME_FILE); if ((index_TIMEFILE = fopen(S, "w")) == NULL) { fprintf(stderr, "can't open %s for writing\n", S); unlink(S); exit(2); } /* Get the sorted times from .glimpse_times.tmp; dump the exact times for each file in .glimpse_times; dump per-day file# in .glimpse_times.index */ gettimeofday(&tv, NULL); temp_file_num = 0; temp_name_list = (char **)my_malloc(sizeof(char *) * file_num); memset(temp_name_list, '\0', sizeof(char *) * file_num); temp_size_list = (int *)my_malloc(sizeof(int) * file_num); memset(temp_size_list, '\0', sizeof(int) * file_num); prevtime = tv.tv_sec; while (fscanf(tmp_TIMEFILE, "%ld %d", &thetime, &theindex) == 2) { temp_name_list[temp_file_num] = LIST_GET(name_list, theindex); temp_size_list[temp_file_num] = LIST_GET(size_list, theindex); for (xx=0; xx<sizeof(long); xx++) fputc((thetime & ((0xff) << (8*(sizeof(long) - xx - 1))))>>(8*(sizeof(long) - xx - 1)), TIMEFILE); /* fprintf(TIMEFILE, "%d %d\n", thetime, (prevtime - thetime)/86400); */ if (firsttime) { for (i=0; i<(prevtime - thetime + 86399)/86400; i++) { for (xx=0; xx<sizeof(int); xx++) fputc((temp_file_num & ((0xff) << (8*(sizeof(int) - xx - 1))))>>(8*(sizeof(int) - xx - 1)), index_TIMEFILE); /* fprintf(index_TIMEFILE, "%d\n", temp_file_num); */ } } else { for (i=0; i<(prevtime - thetime)/86400; i++) { for (xx=0; xx<sizeof(int); xx++) fputc((temp_file_num & ((0xff) << (8*(sizeof(int) - xx - 1))))>>(8*(sizeof(int) - xx - 1)), index_TIMEFILE); /* fprintf(index_TIMEFILE, "%d\n", temp_file_num); */ } } temp_file_num ++; if (!firsttime) prevtime -= i*86400; else if (i>0) prevtime -= (i-1)*86400; firsttime = 0; } if (temp_file_num != file_num) { fprintf(stderr, "error in sort: File=%s, Line=%d\n", __FILE__, __LINE__); exit(2); } /* Change the lists to be sorted now; free temporary lists */ for (i=0; i<temp_file_num; i++) { LIST_SUREGET(name_list, i) = temp_name_list[i]; LIST_SUREGET(size_list, i) = temp_size_list[i]; } my_free(temp_name_list, sizeof(char *) * file_num); my_free(temp_size_list, sizeof(int) * file_num); fclose(tmp_TIMEFILE); fflush(TIMEFILE); fclose(TIMEFILE); fflush(index_TIMEFILE); fclose(index_TIMEFILE); sprintf(S, "%s/%s.tmp", INDEX_DIR, DEF_TIME_FILE); unlink(S); } for(i=0; i<file_num; i++) total_size += LIST_GET(size_list, i); for(i=0; i<file_num; i++) if (LIST_GET(name_list, i) == NULL) total_deleted ++; if (DeleteFromIndex) { if (total_size <= 0) { fprintf(STATFILE, "#of files being deleted = %d, Total #of files = %d\n", total_deleted, file_num - total_deleted); printf("\n#of files being deleted = %d, Total #of files = %d\n", total_deleted, file_num - total_deleted); /* the only output the user sees */ } else { fprintf(STATFILE, "Size of files being indexed = %d B, #of files being deleted = %d, Total #of files = %d\n", total_size, total_deleted, file_num - total_deleted); printf("\nSize of files being indexed = %d B, #of files being deleted = %d, Total #of files = %d\n", total_size, total_deleted, file_num - total_deleted); /* the only output the user sees */ } } else { fprintf(STATFILE, "Size of files being indexed = %d B, Total #of files = %d\n", total_size, file_num); printf("\nSize of files being indexed = %d B, Total #of files = %d\n", total_size, file_num); /* the only output the user sees */ }#ifdef SW_DEBUG for (i=0; i<file_num; i++) printf("name_list[%d] = %s, size=%d\n", i, LIST_GET(name_list, i), LIST_GET(size_list, i));#endif /*SW_DEBUG*/ for (i=0; i<num_inc; i++) {#if BG_DEBUG memory_usage -= strlen(inc) + 2;#endif /*BG_DEBUG*/ my_free(inc[i], 0); } for (i=0; i<num_pat; i++) {#if BG_DEBUG memory_usage -= strlen(pat) + 2;#endif /*BG_DEBUG*/ my_free(pat[i], 0); } /* Life (algorithm) is much simpler, but encode/decode (I/O) is more complex: the p_table is irrelevant */ if (OneFilePerBlock) return 0; /* Now put the files into partitions */ i=0; part_size = total_size / MaxNumPartition; if (part_size <= 0) part_size = total_size; LIST_ADD(size_list, file_num, part_size, int); if (file_num / 2 <= 1) { p_table[0] = 0; p_table[1] = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -