📄 convert.c
字号:
unsigned char *filehash_buffer, *filehash_index_buffer; int files_used;{ int lasti, ret, i, k, foundblank=0, offset, lastoffset = -1, hash, size; unsigned char *buffer; if ((len <= 0) || (len >= MAX_LINE_LEN)) { errno = EINVAL; return -1; } hash = hashNk(file, len); i = hash*4; offset = (filehash_index_buffer[i] << 24) | (filehash_index_buffer[i+1] << 16) | (filehash_index_buffer[i+2] << 8) | filehash_index_buffer[i+3]; if (BigFilenameHashTable) lasti = MAX_64K_HASH - 1; else lasti = MAX_4K_HASH - 1; if (i == lasti) lastoffset = filehash_len; else lastoffset = (filehash_index_buffer[i+4] << 24) | (filehash_index_buffer[i+5] << 16) | (filehash_index_buffer[i+6] << 8) | filehash_index_buffer[i+7]; if ((offset < 0) || (offset > filehash_len) || (lastoffset < 0) || (lastoffset > filehash_len) || (offset >= lastoffset)) { errno = ENOENT; return -1; } size = lastoffset - offset; if (size <= 1) { errno = ENOENT; return -1; }/* fprintf(stderr, "hash=%d offset=%d lastoffset=%d size=%d\n", hash, offset, lastoffset, size); */ buffer = &filehash_buffer[offset]; for (i=0; i<size; i+=4+strlen((char *)&buffer[i+4])+1) { if (InfoAfterFilename) { k = i+4; while (buffer[k] != '\0') { if (buffer[k] == '\\') { k ++; if (buffer[k] == '\0') break; k++; continue; } else { if (buffer[k] == FILE_END_MARK) { buffer[k] = '\0'; foundblank = 1; break; } k++; continue; } } } if (!strcmp((char *)&buffer[i+4], file)) { *pelement = (buffer[i] << 24) | (buffer[i+1] << 16) | (buffer[i+2] << 8) | buffer[i+3]; if (InfoAfterFilename && foundblank) { buffer[k] = FILE_END_MARK; } return 0; } if (InfoAfterFilename && foundblank) { buffer[k] = FILE_END_MARK; } hash_misses ++; } errno = ENOENT; return -1;}/******************************************************************************************** * Converts format of one file "inputfile" to another "outputfile" * * Returns: always 0 for now indicating there was no error: might want to modify this later * * Uses global file descriptors (fdname....) and memory buffers (filenames_buffer/len...) * ********************************************************************************************/intdo_conversion(inputfile, outputfile, indextype, InputType, OutputType, InputEndian, OutputEndian, index_set, index_set_size, ReadIntoMemory) FILE *inputfile; FILE *outputfile; int indextype; int InputType; int OutputType; int InputEndian; int OutputEndian; unsigned int *index_set; unsigned int index_set_size; int ReadIntoMemory;{ int i, j, m = 0, name_len, ret; int nextchar; char s[MAX_LINE_LEN]; char name[MAX_LINE_LEN]; char outname[MAX_LINE_LEN]; struct stat istbuf; memset(index_set, '\0', index_set_size * sizeof(unsigned int)); /* zero out bits set in a previous call to this function ... */ /* Do actual conversion */ if (InputType == IS_NAMES) { while (fgets(name, MAX_LINE_LEN, inputfile) != NULL) { name_len = strlen(name); name[name_len - 1] = '\0'; /* discard '\n' */ if (InfoAfterFilename) discardinfo(name); name_len = strlen(name); if (ReadIntoMemory) ret = mem_name2element(&i, name, name_len, filehash_buffer, filehash_index_buffer, file_num); else ret = name2element(&i, name, name_len, fdhash, fdhash_index, file_num); if (ret != -1) {/* fprintf(stderr, "%s-->%d %x\n", name, i, mask_int[i%(8*sizeof(int))]); */ index_set[block2index(i)] |= mask_int[i%(8*sizeof(int))]; if (OutputType == IS_INDICES) { /* indices is always bigendian */ putc(((i & 0xff000000) >> 24)&0xff, outputfile); putc(((i & 0x00ff0000) >> 16)&0xff, outputfile); putc(((i & 0x0000ff00) >> 8)&0xff, outputfile); putc((i & 0x000000ff), outputfile); } } } if (OutputType == IS_BITS) { for (i=0; i<index_set_size; i++) { if (OutputEndian == IS_BIG_ENDIAN) { putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile); putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile); putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile); putc((index_set[i] & 0x000000ff), outputfile); } else if (OutputEndian == IS_LITTLE_ENDIAN) { /* little */ putc((index_set[i] & 0x000000ff), outputfile); putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile); putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile); putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile); } } } } else if (InputType == IS_INDICES) { /* indices is always bigendian */ while ((nextchar = getc(inputfile)) != EOF) { nextchar = nextchar & 0xff; i = nextchar << 24; if ((nextchar = getc(inputfile)) == EOF) break; nextchar = nextchar & 0xff; i |= nextchar << 16; if ((nextchar = getc(inputfile)) == EOF) break; nextchar = nextchar & 0xff; i |= nextchar << 8; if ((nextchar = getc(inputfile)) == EOF) break; nextchar = nextchar & 0xff; i |= nextchar; if (indextype != 0) { if (i < file_num) index_set[block2index(i)] |= mask_int[i%(8*sizeof(int))]; } else { if (i < MAX_PARTITION) index_set[i] = 1; } if (OutputType == IS_NAMES) { if (ReadIntoMemory) ret = mem_element2name(i, outname, filenames_buffer, filenames_index_buffer, file_num); else ret = element2name(i, outname, fdname, fdname_index, file_num); if (ret != -1) fprintf(outputfile, "%s\n", outname); } } if (OutputType == IS_BITS) { for (i=0; i<index_set_size; i++) { if (OutputEndian == IS_BIG_ENDIAN) { putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile); putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile); putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile); putc((index_set[i] & 0x000000ff), outputfile); } else if (OutputEndian == IS_LITTLE_ENDIAN) { /* little */ putc((index_set[i] & 0x000000ff), outputfile); putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile); putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile); putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile); } } } } else if (InputType == IS_BITS) { i = 0; while ((i < sizeof(int) * index_set_size) && (nextchar = getc(inputfile)) != EOF) { nextchar = nextchar & 0x000000ff;/* fprintf(stderr, "nextchar=%x\n", nextchar); */ if (indextype != 0) { if (InputEndian == IS_LITTLE_ENDIAN) { /* little-endian: little end of integer was dumped first in bitfield_file */ index_set[i/4] |= (nextchar << (8*(i%4))); } else if (InputEndian == IS_BIG_ENDIAN) { /* big-endian: big end of integer is first was dumped first in bitfield_file */ index_set[i/4] |= (nextchar << (8*(4-1-(i%4)))); } } else { if (i < MAX_PARTITION) { /* interpretation of "bit" changes without OneFilePerBlock */ index_set[i] = (nextchar != 0) ? 1 : 0; } else break; /* BITFIELDLENGTH, by above definition, is always > MAX_PARTITION: see io.c */ } i++; } for (i=0; i<index_set_size; i++) {/* fprintf(stderr, "\nindex_set[%d]=%x\n", i, index_set[i]); */ for (j=0; j<sizeof(int)*8; j++) { if (index_set[i] & mask_int[j]) {/* fprintf(stderr, " %d", j); */ m = i*sizeof(int)*8 + j; if (OutputType == IS_NAMES) { if (ReadIntoMemory) ret = mem_element2name(m, outname, filenames_buffer, filenames_index_buffer, file_num); else ret = element2name(m, outname, fdname, fdname_index, file_num); if (ret != -1) fprintf(outputfile, "%s\n", outname); } else if (OutputType == IS_INDICES) { /* indices is always bigendian */ putc((m&0xff000000)>>24, outputfile); putc((m&0x00ff0000)>>16, outputfile); putc((m&0x0000ff00)>>8, outputfile); putc((m&0x000000ff), outputfile); } } } } } return 0;}/********************************************************************** * Calls do_conversion() to convert storage format of a set of files; * * Optimizes some cases by reading important files into memory. * * Returns: 0 on success, -1 on failure * **********************************************************************/intchange_format(InputFilenames, ReadIntoMemory, InputType, OutputType, InputEndian, OutputEndian, glimpseindex_dir, filename_prefix) int InputFilenames; int ReadIntoMemory; int InputType; int OutputType; int InputEndian; int OutputEndian; char *glimpseindex_dir; char *filename_prefix;{ char outname[MAX_LINE_LEN]; /* place where converted output is stored */ char s[MAX_LINE_LEN]; /* temp buffer */ char realname[MAX_LINE_LEN]; /* name after prefix of neighbourhood file is added to it */ char name[MAX_LINE_LEN]; /* name of file gotten from stdin: only if (InputFilenames) */ int lastslash, name_len, indextype, indexnumber, structuredindex, recordlevelindex, temp_attr_num, bytelevelindex; /*indextype*/ int i, ret; /* for-loop/return-value */ int num_input_filenames; /* for statistics */ char temp_rdelim[MAX_LINE_LEN]; /*indextype*/ struct stat istbuf; /*indexstat*/ struct stat fstbuf; /*filestat*/ unsigned int *index_set, index_set_size; /*neighbourhood's bitmap representation*/ FILE *inputfile, *outputfile; /*file to be converted/file to store converted output: only if (InputFilenames) */ /* Options set: read index */ sprintf(s, "%s/%s", glimpseindex_dir, INDEX_FILE); if (-1 == stat(s, &istbuf)) { fprintf(stderr, "Cannot find index in directory `%s'\n\tuse `-H dir' to specify a glimpse index directory\n", glimpseindex_dir); return usage(); } /* Find out existing index of words and partitions/filenumbers */ indextype = get_index_type(s, &indexnumber, &indextype, &structuredindex, temp_rdelim); if (structuredindex == -2) { recordlevelindex = 1; bytelevelindex = 1; } if (structuredindex <= 0) structuredindex = 0; else { temp_attr_num = structuredindex; structuredindex = 1; } if (indextype == 0) { file_num = MAX_PARTITION; /*tiny*/ index_set_size = MAX_PARTITION; } else { if (indextype > 0) file_num = indextype; /*small*/ else file_num = -indextype; /*medium*/ index_set_size = ((file_num + 8*sizeof(int) - 1)/(8*sizeof(int))); } index_set = (unsigned int *)my_malloc(index_set_size * sizeof(unsigned int)); memset(index_set, '\0', index_set_size * sizeof(unsigned int)); sprintf(name, "%s/%s", glimpseindex_dir, NAME_LIST); if ((fdname = open(name, O_RDONLY, 0)) == -1) { fprintf(stderr, "Cannot open for reading: %s\n", name); return -1; } fstbuf.st_size = 0; fstat(fdname, &fstbuf); if (ReadIntoMemory) { filenames_len = fstbuf.st_size; filenames_buffer = NULL; if (allocate_and_fill(&filenames_buffer, filenames_len, name, fdname) == -1) { close(fdname); if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len); return -1; } close(fdname); } sprintf(name, "%s/%s", glimpseindex_dir, NAME_LIST_INDEX); if ((fdname_index = open(name, O_RDONLY, 0)) == -1) { fprintf(stderr, "Cannot open for reading: %s\n", name); if (!ReadIntoMemory) { close(fdname); } else { if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len); } return -1; } fstbuf.st_size = 0; fstat(fdname_index, &fstbuf); if (ReadIntoMemory) { filenames_index_len = fstbuf.st_size; filenames_index_buffer = NULL; if (allocate_and_fill(&filenames_index_buffer, filenames_index_len, name, fdname_index) == -1) { close(fdname_index); if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len); if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -