⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 convert.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
	unsigned char	*filehash_buffer, *filehash_index_buffer;	int		files_used;{	int		lasti, ret, i, k, foundblank=0, offset, lastoffset = -1, hash, size;	unsigned char	*buffer;	if ((len <= 0) || (len >= MAX_LINE_LEN)) {		errno = EINVAL;		return -1;	}	hash = hashNk(file, len);	i = hash*4;	offset = (filehash_index_buffer[i] << 24) | (filehash_index_buffer[i+1] << 16) |			(filehash_index_buffer[i+2] << 8) | filehash_index_buffer[i+3];	if (BigFilenameHashTable) lasti = MAX_64K_HASH - 1;	else lasti = MAX_4K_HASH - 1;	if (i == lasti) lastoffset = filehash_len;	else lastoffset = (filehash_index_buffer[i+4] << 24) | (filehash_index_buffer[i+5] << 16) |				(filehash_index_buffer[i+6] << 8) | filehash_index_buffer[i+7];	if ((offset < 0) || (offset > filehash_len) || (lastoffset < 0) || (lastoffset > filehash_len) || (offset >= lastoffset)) {		errno = ENOENT;		return -1;	}	size = lastoffset - offset;	if (size <= 1) {		errno = ENOENT;		return -1;	}/* fprintf(stderr, "hash=%d offset=%d lastoffset=%d size=%d\n", hash, offset, lastoffset, size); */	buffer = &filehash_buffer[offset];	for (i=0; i<size; i+=4+strlen((char *)&buffer[i+4])+1) {		if (InfoAfterFilename) {			k = i+4;			while (buffer[k] != '\0') {				if (buffer[k] == '\\') {					k ++;					if (buffer[k] == '\0') break;					k++;					continue;				}				else {					if (buffer[k] == FILE_END_MARK) {						buffer[k] = '\0';						foundblank = 1;						break;					}					k++;					continue;				}			}		}		if (!strcmp((char *)&buffer[i+4], file)) {			*pelement = (buffer[i] << 24) | (buffer[i+1] << 16) | (buffer[i+2] << 8) | buffer[i+3];			if (InfoAfterFilename && foundblank) {				buffer[k] = FILE_END_MARK;			}			return 0;		}		if (InfoAfterFilename && foundblank) {			buffer[k] = FILE_END_MARK;		}		hash_misses ++;	}	errno = ENOENT;	return -1;}/******************************************************************************************** * Converts format of one file "inputfile" to another "outputfile"                          * * Returns: always 0 for now indicating there was no error: might want to modify this later * * Uses global file descriptors (fdname....) and memory buffers (filenames_buffer/len...)   * ********************************************************************************************/intdo_conversion(inputfile, outputfile, indextype, InputType, OutputType, InputEndian, OutputEndian, index_set, index_set_size, ReadIntoMemory)	FILE		*inputfile;	FILE		*outputfile;	int		indextype;	int		InputType;	int		OutputType;	int		InputEndian;	int		OutputEndian;	unsigned int	*index_set;	unsigned int	index_set_size;	int		ReadIntoMemory;{	int		i, j, m = 0, name_len, ret;	int		nextchar;	char		s[MAX_LINE_LEN];	char		name[MAX_LINE_LEN];	char		outname[MAX_LINE_LEN];	struct stat	istbuf;	memset(index_set, '\0', index_set_size * sizeof(unsigned int));	/* zero out bits set in a previous call to this function ... */	/* Do actual conversion */	if (InputType == IS_NAMES) {		while (fgets(name, MAX_LINE_LEN, inputfile) != NULL) {			name_len = strlen(name);			name[name_len - 1] = '\0';	/* discard '\n' */			if (InfoAfterFilename) discardinfo(name);			name_len = strlen(name);			if (ReadIntoMemory) ret = mem_name2element(&i, name, name_len, filehash_buffer, filehash_index_buffer, file_num);			else ret = name2element(&i, name, name_len, fdhash, fdhash_index, file_num);			if (ret != -1) {/* fprintf(stderr, "%s-->%d %x\n", name, i, mask_int[i%(8*sizeof(int))]); */				index_set[block2index(i)] |= mask_int[i%(8*sizeof(int))];				if (OutputType == IS_INDICES) {	/* indices is always bigendian */					putc(((i & 0xff000000) >> 24)&0xff, outputfile);					putc(((i & 0x00ff0000) >> 16)&0xff, outputfile);					putc(((i & 0x0000ff00) >> 8)&0xff, outputfile);					putc((i & 0x000000ff), outputfile);				}			}		}		if (OutputType == IS_BITS) {			for (i=0; i<index_set_size; i++) {				if (OutputEndian == IS_BIG_ENDIAN) {					putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile);					putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile);					putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile);					putc((index_set[i] & 0x000000ff), outputfile);				}				else if (OutputEndian == IS_LITTLE_ENDIAN) {	/* little */					putc((index_set[i] & 0x000000ff), outputfile);					putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile);					putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile);					putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile);				}			}		}	}	else if (InputType == IS_INDICES) {	/* indices is always bigendian */		while ((nextchar = getc(inputfile)) != EOF) {			nextchar = nextchar & 0xff;			i = nextchar << 24;			if ((nextchar = getc(inputfile)) == EOF) break;			nextchar = nextchar & 0xff;			i |= nextchar << 16;			if ((nextchar = getc(inputfile)) == EOF) break;			nextchar = nextchar & 0xff;			i |= nextchar << 8;			if ((nextchar = getc(inputfile)) == EOF) break;			nextchar = nextchar & 0xff;			i |= nextchar;			if (indextype != 0) {				if (i < file_num) index_set[block2index(i)] |= mask_int[i%(8*sizeof(int))];			}			else {				if (i < MAX_PARTITION) index_set[i] = 1;			}			if (OutputType == IS_NAMES) {				if (ReadIntoMemory) ret = mem_element2name(i, outname, filenames_buffer, filenames_index_buffer, file_num);				else ret = element2name(i, outname, fdname, fdname_index, file_num);				if (ret != -1) fprintf(outputfile, "%s\n", outname);			}		}		if (OutputType == IS_BITS) {			for (i=0; i<index_set_size; i++) {				if (OutputEndian == IS_BIG_ENDIAN) {					putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile);					putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile);					putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile);					putc((index_set[i] & 0x000000ff), outputfile);				}				else if (OutputEndian == IS_LITTLE_ENDIAN) {	/* little */					putc((index_set[i] & 0x000000ff), outputfile);					putc(((index_set[i] & 0x0000ff00) >> 8)&0xff, outputfile);					putc(((index_set[i] & 0x00ff0000) >> 16)&0xff, outputfile);					putc(((index_set[i] & 0xff000000) >> 24)&0xff, outputfile);				}			}		}	}	else if (InputType == IS_BITS) {		i = 0;		while ((i < sizeof(int) * index_set_size) && (nextchar = getc(inputfile)) != EOF) {			nextchar = nextchar & 0x000000ff;/* fprintf(stderr, "nextchar=%x\n", nextchar); */			if (indextype != 0) {				if (InputEndian == IS_LITTLE_ENDIAN) {	/* little-endian: little end of integer was dumped first in bitfield_file */					index_set[i/4] |= (nextchar << (8*(i%4)));				}				else if (InputEndian == IS_BIG_ENDIAN) {	/* big-endian: big end of integer is first was dumped first in bitfield_file */					index_set[i/4] |= (nextchar << (8*(4-1-(i%4))));				}			}			else {				if (i < MAX_PARTITION) {	/* interpretation of "bit" changes without OneFilePerBlock */					index_set[i] = (nextchar != 0) ? 1 : 0;				}				else break;	/* BITFIELDLENGTH, by above definition, is always > MAX_PARTITION: see io.c */			}			i++;		}		for (i=0; i<index_set_size; i++) {/* fprintf(stderr, "\nindex_set[%d]=%x\n", i, index_set[i]); */			for (j=0; j<sizeof(int)*8; j++) {				if (index_set[i] & mask_int[j]) {/* fprintf(stderr, " %d", j); */					m = i*sizeof(int)*8 + j;					if (OutputType == IS_NAMES) {						if (ReadIntoMemory) ret = mem_element2name(m, outname, filenames_buffer, filenames_index_buffer, file_num);						else ret = element2name(m, outname, fdname, fdname_index, file_num);						if (ret != -1) fprintf(outputfile, "%s\n", outname);					}					else if (OutputType == IS_INDICES) {	/* indices is always bigendian */						putc((m&0xff000000)>>24, outputfile);						putc((m&0x00ff0000)>>16, outputfile);						putc((m&0x0000ff00)>>8, outputfile);						putc((m&0x000000ff), outputfile);					}				}			}		}	}	return 0;}/********************************************************************** * Calls do_conversion() to convert storage format of a set of files; * * Optimizes some cases by reading important files into memory.       * * Returns: 0 on success, -1 on failure                               * **********************************************************************/intchange_format(InputFilenames, ReadIntoMemory, InputType, OutputType, InputEndian, OutputEndian, glimpseindex_dir, filename_prefix)	int		InputFilenames;	int		ReadIntoMemory;	int		InputType;	int		OutputType;	int		InputEndian;	int		OutputEndian;	char		*glimpseindex_dir;	char		*filename_prefix;{	char		outname[MAX_LINE_LEN];		/* place where converted output is stored */	char		s[MAX_LINE_LEN];		/* temp buffer */	char		realname[MAX_LINE_LEN];		/* name after prefix of neighbourhood file is added to it */	char		name[MAX_LINE_LEN];		/* name of file gotten from stdin: only if (InputFilenames) */	int		lastslash, name_len, indextype, indexnumber, structuredindex, recordlevelindex, temp_attr_num, bytelevelindex;	/*indextype*/	int		i, ret;				/* for-loop/return-value */	int		num_input_filenames;		/* for statistics */	char		temp_rdelim[MAX_LINE_LEN];	/*indextype*/	struct stat	istbuf;				/*indexstat*/	struct stat	fstbuf;				/*filestat*/	unsigned int	*index_set, index_set_size;	/*neighbourhood's bitmap representation*/	FILE		*inputfile, *outputfile;	/*file to be converted/file to store converted output: only if (InputFilenames) */	/* Options set: read index */	sprintf(s, "%s/%s", glimpseindex_dir, INDEX_FILE);	if (-1 == stat(s, &istbuf)) {		fprintf(stderr, "Cannot find index in directory `%s'\n\tuse `-H dir' to specify a glimpse index directory\n", glimpseindex_dir);		return usage();	}	/* Find out existing index of words and partitions/filenumbers */	indextype = get_index_type(s, &indexnumber, &indextype, &structuredindex, temp_rdelim);	if (structuredindex == -2) {	    recordlevelindex = 1;	    bytelevelindex = 1;	}	if (structuredindex <= 0) structuredindex = 0;	else {	    temp_attr_num = structuredindex;	    structuredindex = 1;	}	if (indextype == 0) {		file_num = MAX_PARTITION;	/*tiny*/		index_set_size = MAX_PARTITION;	}	else {		if (indextype > 0) file_num = indextype;	/*small*/		else file_num = -indextype;	/*medium*/		index_set_size = ((file_num + 8*sizeof(int) - 1)/(8*sizeof(int)));	}	index_set = (unsigned int *)my_malloc(index_set_size * sizeof(unsigned int));	memset(index_set, '\0', index_set_size * sizeof(unsigned int));	sprintf(name, "%s/%s", glimpseindex_dir, NAME_LIST);	if ((fdname = open(name, O_RDONLY, 0)) == -1) {		fprintf(stderr, "Cannot open for reading: %s\n", name);		return -1;	}	fstbuf.st_size = 0;	fstat(fdname, &fstbuf);	if (ReadIntoMemory) {		filenames_len = fstbuf.st_size;		filenames_buffer = NULL;		if (allocate_and_fill(&filenames_buffer, filenames_len, name, fdname) == -1) {			close(fdname);			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);			return -1;		}		close(fdname);	}	sprintf(name, "%s/%s", glimpseindex_dir, NAME_LIST_INDEX);	if ((fdname_index = open(name, O_RDONLY, 0)) == -1) {		fprintf(stderr, "Cannot open for reading: %s\n", name);		if (!ReadIntoMemory) {			close(fdname);		}		else {			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);		}		return -1;	}	fstbuf.st_size = 0;	fstat(fdname_index, &fstbuf);	if (ReadIntoMemory) {		filenames_index_len = fstbuf.st_size;		filenames_index_buffer = NULL;		if (allocate_and_fill(&filenames_index_buffer, filenames_index_len, name, fdname_index) == -1) {			close(fdname_index);			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);			if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -