📄 get_filename.c
字号:
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */#include <sys/types.h>#include <sys/stat.h>#include <errno.h>#include "glimpse.h"#include <fcntl.h>#define CHAR unsigned char/* ----------------------------------------------------------------------get_filenames()input: an index table, (an index vector, i-th entry is ON ifi-th partition is to be searched.), the partition table in src_index_set[]and the list of all files in "NAME_LIST".output: the list of filenames to be searched.------------------------------------------------------------------------- */#if BG_DEBUGextern FILE *debug;#endif /*BG_DEBUG*/extern int p_table[MAX_PARTITION];extern CHAR **GTextfiles;extern CHAR **GTextfilenames;extern int *GFileIndex;extern int GNumfiles;extern CHAR GProgname[];extern CHAR FileNamePat[];extern int MATCHFILE;extern int agrep_outpointer;extern int mask_int[32];extern int OneFilePerBlock;extern char INDEX_DIR[MAX_LINE_LEN];extern unsigned int *multi_dest_index_set[MAXNUM_PAT];extern int file_num; /* in index/io.c */int bigbuffer_size;int first_line_len = 0;char *bigbuffer = NULL; /* constant buffer to read all filenames in NAME_LIST */char *outputbuffer = NULL; /* keeps changing: used for -F search via memagrep */int outputbuffer_len = 0;extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE;read_filenames(){ struct stat st; unsigned char buffer[MAX_NAME_SIZE]; char *currptr; int i; /* one time processing: assumes during one run of glimpse, the index remains constant! */ if (bigbuffer == NULL) { FILE *fp = fopen(NAME_LIST, "r"); if (fp == NULL) { fprintf(stderr, "Can't open for reading: %s/%s\n", INDEX_DIR, NAME_LIST); exit(2); } if (-1 == stat(NAME_LIST, &st)) { fclose(fp); fprintf(stderr, "Can't stat: %s/%s\n", INDEX_DIR, NAME_LIST); exit(2); } fgets(buffer, MAX_NAME_SIZE, fp); first_line_len = strlen(buffer); bigbuffer_size = st.st_size - first_line_len; sscanf(buffer, "%d", &file_num); if ((file_num < 0) || (file_num > MaxNum24bPartition)) { fclose(fp); fprintf(stderr, "Error in reading: %s/%s\n", INDEX_DIR, NAME_LIST); exit(2); } if (file_num == 0) { fclose(fp); fprintf(stderr, "Warning: No files were indexed! Exiting...\n"); exit(2); } initialize_data_structures(file_num); for (i=0; i<MAXNUM_PAT; i++) { multi_dest_index_set[i] = (unsigned int *)my_malloc(sizeof(int)*REAL_PARTITION); memset(multi_dest_index_set[i], '\0', sizeof(int) * REAL_PARTITION); } bigbuffer = (char *)my_malloc(bigbuffer_size + 2*MAX_PAT + 2); /* The whole file + place to store -F's pattern on BOTH sides */ outputbuffer_len = (FILES_PER_PARTITION(file_num)*MAX_NAME_SIZE); if (bigbuffer != NULL) outputbuffer = (char *)my_malloc(outputbuffer_len); /* Space for max# files per partition */ if (outputbuffer != NULL) GTextfiles = (CHAR **) my_malloc(sizeof(CHAR *) * file_num); if (GTextfiles != NULL) GTextfilenames = (CHAR **) my_malloc(sizeof(CHAR *) * file_num); if (GTextfilenames != NULL) GFileIndex = (int *)my_malloc(sizeof(int) * file_num); if (bigbuffer == NULL || outputbuffer == NULL || GTextfiles == NULL || GTextfilenames == NULL || GFileIndex == NULL) { fclose(fp); fprintf(stderr, "%s: my_malloc failure in %s:%d!\n", GProgname, __FILE__, __LINE__); exit(2); /* No point freeing memory */ } if (bigbuffer_size != fread(bigbuffer+MAX_PAT, 1, bigbuffer_size, fp)) {/* read in whole file in CONTIGUOUS memory */ fclose(fp); fprintf(stderr, "Error in reading: %s/%s\n", INDEX_DIR, NAME_LIST); exit(2); /* No point freeing memory */ } memset(bigbuffer, '\n', MAX_PAT); memset(bigbuffer+bigbuffer_size+MAX_PAT, '\n', MAX_PAT + 2); for (i=0, currptr = bigbuffer+MAX_PAT; i<file_num && currptr < bigbuffer + MAX_PAT + bigbuffer_size; i++, currptr ++) { GTextfilenames[i] = (unsigned char *)currptr; while (*currptr != '\n') currptr ++; } } return 0;}/* If too many files obtained as result of search, go to original slow algorithm *//* Else, -f files as pattern to search the actual list of filenames that matched (compute new list from those that matched) */intmask_filenames(index_vect, infile, num_files, num_blocks)int *index_vect;char *infile;int num_files; /* total number of files */int num_blocks; /* number of files matching the search expression */{ char *argv[8]; int ret, i /* filenames_index */, j /* outputbuffer */, k, l, count, maxcount, offset, prevreadoffset, readoffset, num_read, name_list_size, found; char *temp_bigbuffer; int *temp_bigbuffer_offset; int *temp_bigbuffer_index; int temp_bigbuffer_len = 0; int curr_temp_bigbuffer; char *name_list_buffer; if ((num_blocks*100/num_files > DEF_MAX_INDEX_PERCENT/2) && (num_blocks > MaxNum8bPartition)) return slow_mask_filenames(index_vect, infile); for (i=0; i<num_files; i++) { if (index_vect[block2index(i)] & mask_int[i % 32]) { if (i < num_files - 1) temp_bigbuffer_len += GTextfilenames[i+1] - GTextfilenames[i] /* including '\n' */; else temp_bigbuffer_len += &bigbuffer[bigbuffer_size + MAX_PAT] - (char *)GTextfilenames[i]; } } if ((temp_bigbuffer = (char *)malloc(temp_bigbuffer_len + 2)) == NULL) { fprintf(stderr, "%s: my_malloc failure in %s:%d!\n", GProgname, __FILE__, __LINE__); exit(2); } if ((temp_bigbuffer_offset = (int *)malloc((num_blocks+1)*sizeof(int))) == NULL) { fprintf(stderr, "%s: my_malloc failure in %s:%d!\n", GProgname, __FILE__, __LINE__); exit(2); } if ((temp_bigbuffer_index = (int *)malloc(num_blocks*sizeof(int))) == NULL) { fprintf(stderr, "%s: my_malloc failure in %s:%d!\n", GProgname, __FILE__, __LINE__); exit(2); } temp_bigbuffer[0] = '\n'; for (i=0, curr_temp_bigbuffer=1, j=0; (i<num_files) && (j<num_blocks); i++) { if (index_vect[block2index(i)] & mask_int[i % 32]) { /* will be satisfied num_blocks times */ temp_bigbuffer_offset[j] = curr_temp_bigbuffer; temp_bigbuffer_index[j] = i; j++; if (i < num_files - 1) { memcpy(&temp_bigbuffer[curr_temp_bigbuffer], GTextfilenames[i], GTextfilenames[i+1] - GTextfilenames[i] /* including '\n' */); curr_temp_bigbuffer += GTextfilenames[i+1] - GTextfilenames[i] /* including '\n' */; } else { memcpy(&temp_bigbuffer[curr_temp_bigbuffer], GTextfilenames[i], &bigbuffer[bigbuffer_size + MAX_PAT] - (char *)GTextfilenames[i]); curr_temp_bigbuffer += &bigbuffer[bigbuffer_size + MAX_PAT] - (char *)GTextfilenames[i] /* including '\n' */; } } } temp_bigbuffer_offset[num_blocks] = temp_bigbuffer_len + 1; /* last one */ /* Now, call agrep with print offset of match */ name_list_buffer = temp_bigbuffer; name_list_size = temp_bigbuffer_len + 1; /* including initial '\n' */ argv[0] = "glimpse"; argv[1] = "-b"; argv[2] = "-u"; argv[3] = "-f"; argv[4] = infile; argv[5] = ""; errno = 0; if ((((ret = memagrep(5, argv, name_list_size, name_list_buffer, outputbuffer_len, outputbuffer)) <= 0) || (agrep_outpointer <= 0)) && (errno != ERANGE)) { for (i=0; i<round(file_num, 8*sizeof(int)); i++) index_vect[i] = 0; free(temp_bigbuffer); free(temp_bigbuffer_offset); free(temp_bigbuffer_index); return -1; } /* outputbuffer[agrep_outpointer] = '\0'; printf("%s", outputbuffer); getchar(); */ /* Check these offsets (outputbuffer[0..agrep_outpointer]) against those stored in temp_bigbuffer_offset and figure out the mask */ /* Use the fact that temp_bigbuffer_offset has the beginning offset of each file name in bigbuffer */ for (i=0; i<round(file_num, 8*sizeof(int)); i++) multi_dest_index_set[0][i] = 0; ret = sizeof(int); num_read = ret; prevreadoffset = temp_bigbuffer_offset[0]; /* printf("prevreadoffset=%d name_list_size=%d\n", prevreadoffset, name_list_size); */ count = 0; maxcount = 0; i = 0; j = 0; l = 0; while (j < agrep_outpointer) { /* offsets will be printed by agrep always in the increasing order, so this is a one pass algorithm */ k = j; while ((k<agrep_outpointer) && (isalnum(((unsigned char*)outputbuffer)[k]))) k++; outputbuffer[k] = '\0'; offset = 0; errno = 0; offset = atoi(&outputbuffer[j]); /* printf("offset=%d\n", offset); */ k++; while ((k<agrep_outpointer) && (!isalnum(((unsigned char *)outputbuffer)[k]))) k++; j = k; if ((offset <= 0) || (errno == ERANGE)) continue; found = 0; while (!found) { if (count >= maxcount) { /* first time (not compressing into smaller code since I want it to be similar to slow_mask... below) */ ret = (num_blocks - 1)*sizeof(int); num_read += ret; maxcount = num_blocks; for (i=0; i<=ret /* to process last one also */; i+=sizeof(int), count++) { readoffset = temp_bigbuffer_offset[1 + i/sizeof(int)]; /* printf("readoffset=%d\n", readoffset); */ if ((offset >= prevreadoffset) && (offset < readoffset)) { /* printf("count=%d\n", count); */ if (OneFilePerBlock) multi_dest_index_set[0][block2index(temp_bigbuffer_index[count])] |= mask_int[temp_bigbuffer_index[count] % 32]; else { for (; l<MAX_PARTITION; l++) { if ((temp_bigbuffer_index[count] >= p_table[l]) && (temp_bigbuffer_index[count] < p_table[l+1])) { multi_dest_index_set[0][l] = 1; break; /* out of for */ } } /* can't come here without break: if it does (serious!) will break out w/o setting anything */ } prevreadoffset = readoffset; i += sizeof(int); count ++; found = 1; break; /* out of for */ } prevreadoffset = readoffset; } } else { for (; i<=ret /* to process last one also */; i+=sizeof(int), count++) { readoffset = temp_bigbuffer_offset[1 + i/sizeof(int)]; /* printf("readoffset=%d\n", readoffset); */ if ((offset >= prevreadoffset) && (offset < readoffset)) { /* printf("count=%d\n", count); */ if (OneFilePerBlock) multi_dest_index_set[0][block2index(temp_bigbuffer_index[count])] |= mask_int[temp_bigbuffer_index[count] % 32]; else { for (; l<MAX_PARTITION; l++) { if ((temp_bigbuffer_index[count] >= p_table[l]) && (temp_bigbuffer_index[count] < p_table[l+1])) { multi_dest_index_set[0][l] = 1; break; /* out of for */ } } /* can't come here without break: if it does (serious!) will break out without setting anything */ } prevreadoffset = readoffset; i += sizeof(int); count ++; found = 1; break; /* out of for */ } prevreadoffset = readoffset; } } } } /* Now AND the incoming mask with the one constructed above */ if (OneFilePerBlock) { for (i=0; i<round(file_num, 8*sizeof(int)); i++) index_vect[i] &= multi_dest_index_set[0][i]; } else { for (i=0; i<MAX_PARTITION; i++) index_vect[i] &= multi_dest_index_set[0][i]; } free(temp_bigbuffer); free(temp_bigbuffer_offset); free(temp_bigbuffer_index); return 0;}/* Searches the set of file names in bigbuffer for the files mentioned in infile and forces index_vect to contain the mask for these files only *//* Works only when .glimpse_filenames_hash is created by glimpseindex, i.e., glimpse version 3.0 or more */intslow_mask_filenames(index_vect, infile)int *index_vect;char *infile;{ char *argv[8], tempfile[MAX_NAME_LEN], *name_list_buffer; CHAR tempbuf[MAX_LINE_LEN]; FILE *fp; int ret, i /* filenames_index */, j /* outputbuffer */, k, l, count, maxcount, offset, prevreadoffset, readoffset, num_read, name_list_size, found; struct stat st_buf; /* Call agrep with "print byte-offset = -b", "don't print pattern = -u" and "multi-pattern search = -f"; put the output in outputbuffer */#if 0 strcpy(tempfile, INDEX_DIR); strcat(tempfile, "/"); strcat(tempfile, NAME_LIST); stat(tempfile, &st_buf); name_list_size = st_buf.st_size;#else name_list_buffer = bigbuffer + MAX_PAT - 1; name_list_size = bigbuffer_size + 1;#endif argv[0] = "glimpse"; argv[1] = "-b"; argv[2] = "-u"; argv[3] = "-f"; argv[4] = infile;#if 0 argv[5] = tempfile; argv[6] = ""; errno = 0; if ((((ret = fileagrep(6, argv, outputbuffer_len, outputbuffer)) <= 0) || (agrep_outpointer <= 0)) && (errno != ERANGE)) { for (i=0; i<round(file_num, 8*sizeof(int)); i++) index_vect[i] = 0; return -1; }#else argv[5] = ""; errno = 0; if ((((ret = memagrep(5, argv, name_list_size, name_list_buffer, outputbuffer_len, outputbuffer)) <= 0) || (agrep_outpointer <= 0)) && (errno != ERANGE)) { for (i=0; i<round(file_num, 8*sizeof(int)); i++) index_vect[i] = 0; return -1; }#endif /* outputbuffer[agrep_outpointer] = '\0'; printf("%s", outputbuffer); getchar(); */ /* Check these offsets (outputbuffer[0..agrep_outpointer]) against those stored in NAME_LIST_INDEX and figure out the mask */ /* Use the fact that NAME_LIST_INDEX has the beginning offset of each file name in NAME_LIST (ending offset of last filename = size of NAME_LIST) */ strcpy(tempfile, INDEX_DIR); strcat(tempfile, "/"); strcat(tempfile, NAME_LIST_INDEX); if ((fp = fopen(tempfile, "r")) == NULL) { for (i=0; i<round(file_num, 8*sizeof(int)); i++) index_vect[i] = 0; return -1; } fstat(fileno(fp), &st_buf); st_buf.st_size = (st_buf.st_size/sizeof(int)) * sizeof(int); /* chop it off */ for (i=0; i<round(file_num, 8*sizeof(int)); i++) multi_dest_index_set[0][i] = 0; if ((ret = fread(tempbuf, 1, sizeof(int), fp)) != sizeof(int)) { fclose(fp); for (i=0; i<round(file_num, 8*sizeof(int)); i++) index_vect[i] = 0; return -1; } num_read = ret; prevreadoffset = (tempbuf[0] << 24) | (tempbuf[1] << 16) | (tempbuf[2] << 8) | tempbuf[3]; /* printf("prevreadoffset=%d name_list_size=%d\n", prevreadoffset, name_list_size); */ count = 0; maxcount = 0; i = 0; j = 0; l = 0; while (j < agrep_outpointer) { /* offsets will be printed by agrep always in the increasing order, so this is a one pass algorithm */ k = j; while ((k<agrep_outpointer) && (isalnum(((unsigned char *)outputbuffer)[k]))) k++; outputbuffer[k] = '\0'; offset = 0; errno = 0; offset = atoi(&outputbuffer[j]) + first_line_len - 1; /* I have \n part of it included in name_list_buffer */ /* printf("offset=%d\n", offset); */ k++; while ((k<agrep_outpointer) && (!isalnum(((unsigned char *)outputbuffer)[k]))) k++; j = k; if ((offset <= first_line_len - 1) || (errno == ERANGE)) continue; found = 0; while (!found) { if (count >= maxcount) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -