📄 convert.c
字号:
/**********************************************************************************************//* convert.c: Program to inter-convert different representations of neighbourhood sets *//* *//* Uses: to compress neighbourhood sets for faster search/uncompress for viewing/editing them *//* Author: Burra Gopal, bgopal@cs.arizona.edu, Sep 7-8 1996: WebGlimpse support *//**********************************************************************************************/#include "glimpse.h"#include <stdlib.h>#include <sys/time.h>#if ISO_CHAR_SET#include <locale.h> /* support for 8bit character set:ew@senate.be */#endif#include <errno.h>#define IS_LITTLE_ENDIAN 1#define IS_BIG_ENDIAN 0#define IS_INDICES 1#define IS_BITS 2#define IS_NAMES 3#define USUALBUFFER_SIZE (MAX_LINE_LEN*64)/* Exported routines */int element2name(/*int, out char*, int, int, int*/);int mem_element2name(/*int, char*, unsigned char*, unsigned char*, int*/);int name2element(/*out int*, char*, int, int, int, int*/);int mem_name2element(/*out int*, char*, int, unsigned char*, unsigned char*, int*/);int do_conversion(/*FILE*, FILE*, int, int, int, int, int, unsigned int *, int, int*/);int change_format(/*int, int, int, int, int, int, char *, char **/);/* Imported routines */int hashNk(/*char *, int*/);/* from io.c *//* Internal routines */int discardinfo(/*char **/);int allocate_and_fill(/* out unsigned char **, int, char *, int*/);/* Imported variables */extern int errno;extern int get_index_type(); /* from io.c */extern int file_num; /* from io.c */extern int mask_int[32]; /* from io.c */extern int BigFilenameHashTable; /* from io.c */extern int InfoAfterFilename; /* from io.c *//* Internal variables *//* Variables related to options (i/p-->o/p types)*/int InputType, OutputType, InputEndian, OutputEndian, InputFilenames, ReadIntoMemory;char glimpseindex_dir[MAX_LINE_LEN];char filename_prefix[MAX_LINE_LEN];/* Variables related to ReadIntoMemory option (I/O efficiency) */unsigned char *filenames_buffer, *filenames_index_buffer, *filehash_buffer, *filehash_index_buffer;int filenames_len, filenames_index_len, filehash_len, filehash_index_len;int fdname, fdname_index, fdhash, fdhash_index;unsigned char usualbuffer[USUALBUFFER_SIZE];/* Variables for statistics */int hash_misses = 0;/******************************************************** * Discards information after ' ' in filename * * Returns: 0 if it found info to discard, -1 otherwise * * Assumes: file ends with '\0' * * CHANGED from ' ' to FILE_END_MARK 6/7/99 --GB * ********************************************************/intdiscardinfo(file) char file[];{ int k; if (InfoAfterFilename) { k = 0; while (file[k] != '\0') { if (file[k] == '\\') { k ++; if (file[k] == '\0') break; k++; continue; } else { if (file[k] == FILE_END_MARK) { file[k] = '\0'; return 0; } k++; continue; } } } /* pab23feb98: return -1 if !InfoAfterFilename */ return -1;}/******************************************************************************************** * Allocates the "buffer" of size "len" and fills it up with "len" amount of data from "fd" * * Returns: 0 on success, -1 on failure (i.e., if allocation fails or can't read fully) * ********************************************************************************************/intallocate_and_fill(buffer, len, filename, fd) unsigned char **buffer; int len; char *filename; int fd;{ if ((len <= 0) || ((*buffer = (unsigned char *)my_malloc(len)) == NULL)) { fprintf(stderr, "Disable -M option: cannot allocate memory for %s\n", filename); return -1; } if (len != read(fd, *buffer, len)) { fprintf(stderr, "Disable -M option: cannot read %s\n", filename); return -1; } return 0;}/************************************************************************************** * Finds filename for given element (index#: every element points to indexed object) * * Returns: -1 if error and 0 on success * * See glimpse/index/io.c/save_datastructures() for the format of the names-file * **************************************************************************************/intelement2name(element, file, fd, fdi, files_used) int element; char file[]; /* out */ int fd, fdi; /* fd=filenames fd, fdi=filenames_index fd */ int files_used;{ int k, offset, lastoffset = -1, len; unsigned char array[4]; if ((element < 0) || (element >= files_used)) { errno = EINVAL; return -1; } lseek(fdi, (long)element*4, SEEK_SET); if (read(fdi, array, 4) != 4) { errno = ENOENT; return -1; } offset = (array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]; if (read(fdi, array, 4) == 4) { lastoffset = (array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]; } if (lseek(fd, (long)offset, SEEK_SET) == -1) { fprintf(stderr, ".glimpse_filenames: can't seek to %d\n", offset); return -1; } if (lastoffset != -1) len = read(fd, file, lastoffset - offset); else len = read(fd, file, MAX_LINE_LEN); if (len == -1) { errno = ENOENT; return -1; } file[len - 1] = '\0'; /* separated by '\n', so zero that out: if empty file, will get its strlen() to be 0, as expected */ if (InfoAfterFilename) discardinfo(file); return 0;}/************************************************************************************** * Finds filename for given element (index#: every element points to indexed object) * * Returns: -1 if error and 0 on success * * See glimpse/index/io.c/save_datastructures() for the format of the names-file * * Works by reading in-memory copy of the files * **************************************************************************************/intmem_element2name(element, file, filenames_buffer, filenames_index_buffer, files_used) int element; char file[]; /* out */ unsigned char *filenames_buffer, *filenames_index_buffer; int files_used;{ int i, offset, lastoffset = -1, len; if ((element < 0) || (element >= files_used) || (element >= filenames_index_len)) { errno = EINVAL; return -1; } i = element*4; offset = (filenames_index_buffer[i] << 24) | (filenames_index_buffer[i+1] << 16) | (filenames_index_buffer[i+2] << 8) | filenames_index_buffer[i+3]; if (element == files_used - 1) lastoffset = filenames_len; else lastoffset = (filenames_index_buffer[i+4] << 24) | (filenames_index_buffer[i+5] << 16) | (filenames_index_buffer[i+6] << 8) | filenames_index_buffer[i+7];/* fprintf(stderr, "element=%d offset=%d, lastoffset=%d, filenames_len=%d, files_used=%d\n", element, offset, lastoffset, filenames_len, files_used); */ if ((offset < 0) || (offset > filenames_len) || (lastoffset < 0) || (lastoffset > filenames_len) || (offset >= lastoffset)) { errno = ENOENT; return -1; } if (lastoffset - offset >= MAX_LINE_LEN) { errno = EINVAL; return -1; } memcpy(file, &filenames_buffer[offset], lastoffset-offset); file[lastoffset - offset - 1] = '\0'; /* separated by '\n', so zero that out: if empty file, will get its strlen() to be 0, as expected */ if (InfoAfterFilename) discardinfo(file); return 0;}/***************************************************************************************** * Returns: element (index#) for given filename (every element points to indexed object) * * Returns: -1 if error (assuming that element#s are >= 0, ofcourse...) * * See glimpse/index/io.c/save_datastructures() for the format of the hash-file * *****************************************************************************************/intname2element(pelement, file, len, fd, fdi, files_used) int *pelement; /* out */ char file[]; int len; int fd, fdi; /* fd=filehash fd, fdi=filehash_index fd */ int files_used;{ int malloced = 0, ret, i, k, foundblank=0, offset, lastoffset = -1, hash, size; unsigned char *buffer, array[4]; if ((len <= 0) || (len >= MAX_LINE_LEN)) { errno = EINVAL; return -1; } hash = hashNk(file, len);/* fprintf(stderr, "len=%d file=%s hash=%d\n", len, file, hash); */ if (lseek(fdi, (long)hash*4, SEEK_SET) == -1) { fprintf(stderr, ".glimpse_filehash_index: can't seek to %d\n", hash*4); return -1; } if ((ret = read(fdi, array, 4)) != 4) { fprintf(stderr, "read only %d bytes from %d\n", ret, hash*4); errno = ENOENT; return -1; } offset = (array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3];/* fprintf(stderr, "offset=%d\n", offset); */ if (read(fdi, array, 4) == 4) { lastoffset = (array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]; } else lastoffset = lseek(fd, (long)0, SEEK_END /*2*/ /* from end */); /* so that next time I get prev-value = file size *//* fprintf(stderr, "lastoffset=%d\n", lastoffset); */ size = lastoffset - offset; if (size <= 1) { errno = ENOENT; return -1; } if (size < USUALBUFFER_SIZE) buffer = usualbuffer; else { buffer = (unsigned char *)my_malloc(size); malloced = 1; }/* fprintf(stderr, "hash=%d offset=%d lastoffset=%d size=%d\n", hash, offset, lastoffset, size); */ lseek(fd, (long)offset, SEEK_SET); if (size != read(fd, buffer, size)) { if (malloced) my_free((char *)buffer, size); errno = ENOENT; return -1; }/* fprintf(stderr, "buffer=%s\n", buffer+4); */ for (i=0; i<size; i+=4+strlen((char *)&buffer[i+4])+1) { if (InfoAfterFilename) { k = i+4; while (buffer[k] != '\0') { if (buffer[k] == '\\') { k ++; if (buffer[k] == '\0') break; k++; continue; } else { if (buffer[k] == FILE_END_MARK) { buffer[k] = '\0'; foundblank = 1; break; } k++; continue; } } } if (!strcmp((char *)&buffer[i+4], file)) { *pelement = (buffer[i] << 24) | (buffer[i+1] << 16) | (buffer[i+2] << 8) | buffer[i+3]; if (InfoAfterFilename && foundblank) { buffer[k] = FILE_END_MARK; } if (malloced) my_free((char *)buffer, size); return 0; } if (InfoAfterFilename && foundblank) { buffer[k] = FILE_END_MARK; } hash_misses ++; } if (malloced) my_free((char *)buffer, size); errno = ENOENT; return -1;}/***************************************************************************************** * Returns: element (index#) for given filename (every element points to indexed object) * * Returns: -1 if error (assuming that element#s are >= 0, ofcourse...) * * See glimpse/index/io.c/save_datastructures() for the format of the hash-file * * Works by reading in-memory copy of the files * *****************************************************************************************/mem_name2element(pelement, file, len, filehash_buffer, filehash_index_buffer, files_used) int *pelement; /* out */ char *file; int len;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -