📄 convert.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
			return -1;		}		close(fdname_index);	}	sprintf(name, "%s/%s", glimpseindex_dir, NAME_HASH);	if ((fdhash = open(name, O_RDONLY, 0)) == -1) {		fprintf(stderr, "Cannot open for reading: %s\n", name);		fprintf(stderr, "To change formats, the index must be built using `glimpseindex -h ...'\n");		if (!ReadIntoMemory) {			close(fdname);			close(fdname_index);		}		else {			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);			if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);		}		return -1;	}	fstbuf.st_size = 0;	fstat(fdhash, &fstbuf);	if (ReadIntoMemory) {		filehash_len = fstbuf.st_size;		filehash_buffer = NULL;		if (allocate_and_fill(&filehash_buffer, filehash_len, name, fdhash) == -1) {			close(fdhash);			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);			if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);			if (filehash_buffer != NULL) my_free(filehash_buffer, filehash_len);			return -1;		}		close(fdhash);	}	sprintf(name, "%s/%s", glimpseindex_dir, NAME_HASH_INDEX);	if ((fdhash_index = open(name, O_RDONLY, 0)) == -1) {		fprintf(stderr, "Cannot open for reading: %s\n", name);		fprintf(stderr, "To change formats, the index must be built using `glimpseindex -h ...'\n");		if (!ReadIntoMemory) {			close(fdname);			close(fdname_index);			close(fdhash);		}		else {			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);			if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);			if (filehash_buffer != NULL) my_free(filehash_buffer, filehash_len);		}		return -1;	}	fstbuf.st_size = 0;	fstat(fdhash_index, &fstbuf);	if (fstbuf.st_size == MAX_64K_HASH * 4) BigFilenameHashTable = 1;	else if (fstbuf.st_size == MAX_4K_HASH * 4) BigFilenameHashTable = 0;	else {		fprintf(stderr, "Corrupted file: %s\n", name);		if (!ReadIntoMemory) {			close(fdname);			close(fdname_index);			close(fdhash);			close(fdhash_index);		}		else {			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);			if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);			if (filehash_buffer != NULL) my_free(filehash_buffer, filehash_len);		}		return -1;	}	if (ReadIntoMemory) {		if (BigFilenameHashTable) filehash_index_len = MAX_64K_HASH * 4;		else filehash_index_len = MAX_4K_HASH * 4;		/* filehash_index_len = fstbuf.st_size; */		filehash_index_buffer = NULL;		if (allocate_and_fill(&filehash_index_buffer, filehash_index_len, name, fdhash_index) == -1) {			close(fdhash_index);			if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);			if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);			if (filehash_buffer != NULL) my_free(filehash_buffer, filehash_len);			if (filehash_index_buffer != NULL) my_free(filehash_index_buffer, filehash_index_len);			return -1;		}		close(fdhash_index);	}/* fprintf(stderr, "file_num=%d, indextype=%d, structuredindex=%d, index_set_size=%d\n", file_num, indextype, structuredindex, index_set_size); */	/* Initialize statistics information */	hash_misses = 0;	/* Do actual conversion */	if (!InputFilenames) ret = do_conversion(stdin, stdout, indextype, 						InputType, OutputType, InputEndian, OutputEndian, index_set, index_set_size, ReadIntoMemory);	else {		sprintf(outname, "./.wgconvert.%d", getpid());	/* place where converted neighbourhoods are gonna be (./ => same file system as input :-) */		/* convert file by file: if there is an error in converting one file, go to the next one!!! */		num_input_filenames = 0;		while (fgets(name, MAX_LINE_LEN, stdin) != NULL) {			num_input_filenames ++;			name_len = strlen(name);			name[name_len - 1] = '\0';			/* Figure out filename and put the -P prefix before it */			lastslash = -1;			for (i=0; i<name_len - 1; i++) {				if (name[i] == '/') {					lastslash = i;				}				else if (name[i] == '\\') {					i++;				}			}			if (lastslash >= 0) {				memcpy(realname, name, lastslash+1);				realname[lastslash+1] = '\0';			}			else realname[0] = '\0';			strcat(realname, filename_prefix);			strcat(realname, &name[lastslash+1]);			/* Call do_conversion() and check if it worked OK */			if ((inputfile = fopen(realname, "r")) == NULL) {				fprintf(stderr, "Can't open for reading: %s\n", realname);				continue;			}			if ((fstat(fileno(inputfile), &fstbuf) == -1) || (fstbuf.st_size <= 0)) {				fprintf(stderr, "Zero sized file: %s\n", realname);				fclose(inputfile);				continue;			}							if ((outputfile = fopen(outname, "w")) == NULL) {				fprintf(stderr, "Can't open for writing: %s\n", realname);				fclose(inputfile);				continue;			}			do_conversion(inputfile, outputfile, indextype, 					InputType, OutputType, InputEndian, OutputEndian, index_set, index_set_size, ReadIntoMemory);			fclose(inputfile);			fflush(outputfile);			if ((fstat(fileno(outputfile), &fstbuf) == -1) || (fstbuf.st_size <= 0)) {				fprintf(stderr, "Zero sized output for: %s\n", realname);				fclose(outputfile);				continue;			}			fclose(outputfile);			/* move the converted neighbourhood file into the old neighbourhood file */#if	0			sprintf(s, "mv -f %s %s", outname, realname);			if (system(s) == -1) fprintf(stderr, "Errno=%d -- could not execute: %s\n", errno, s);#else			if (rename(outname, realname) == -1) fprintf(stderr, "Errno=%d -- could not rename %s as %s\n", errno, outname, realname);#endif		}		unlink(outname);		ret = 0;	}	/* Cleanup and return */	if (!ReadIntoMemory) {		close(fdname);		close(fdname_index);		close(fdhash);		close(fdhash_index);	}	else {		if (filenames_buffer != NULL) my_free(filenames_buffer, filenames_len);		if (filenames_index_buffer != NULL) my_free(filenames_index_buffer, filenames_index_len);		if (filehash_buffer != NULL) my_free(filehash_buffer, filehash_len);		if (filehash_index_buffer != NULL) my_free(filehash_index_buffer, filehash_index_len);	}	my_free(index_set, index_set_size*sizeof(int));#if	1	if (InputFilenames && (InputType == IS_NAMES)) printf("hash_misses=%d num_input_filenames=%d\n", hash_misses, num_input_filenames);#endif	return ret;}/*************************************** * Processes options                   * * Returns 0 on success, -1 on failure * ***************************************/intmain(argc, argv)	int	argc;	char	*argv[];{	/* Initialize */	InfoAfterFilename = 0;	InputFilenames = 0;	ReadIntoMemory = 0;	filenames_buffer = filenames_index_buffer = filehash_buffer = filehash_index_buffer = NULL;	filename_prefix[0] = '\0';	InputType = 0;	OutputType = 0;	InputEndian = IS_BIG_ENDIAN;	OutputEndian = IS_BIG_ENDIAN;	glimpseindex_dir[0] = '\0';	/* Read options (to know what they mean, check usage() below */	while (argc > 1) {		if (strcmp(argv[1], "-ni") == 0) {			InputType = IS_NAMES;			OutputType = IS_INDICES;			argc --; argv ++;		}		else if (strcmp(argv[1], "-in") == 0) {			InputType = IS_INDICES;			OutputType = IS_NAMES;			argc --; argv ++;		}		else if (strcmp(argv[1], "-nb") == 0) {			InputType = IS_NAMES;			OutputType = IS_BITS;			argc --; argv ++;		}		else if (strcmp(argv[1], "-bn") == 0) {			InputType = IS_BITS;			OutputType = IS_NAMES;			argc --; argv ++;		}		else if (strcmp(argv[1], "-ib") == 0) {			InputType = IS_INDICES;			OutputType = IS_BITS;			argc --; argv ++;		}		else if (strcmp(argv[1], "-bi") == 0) {			InputType = IS_BITS;			OutputType = IS_INDICES;			argc --; argv ++;		}		else if (strcmp(argv[1], "-lo") == 0) {			OutputEndian = IS_LITTLE_ENDIAN;			argc --; argv ++;		}		else if (strcmp(argv[1], "-li") == 0) {			InputEndian = IS_LITTLE_ENDIAN;			argc --; argv ++;		}		else if (strcmp(argv[1], "-H") == 0) {		    if (argc == 2) {			fprintf(stderr, "-H should be followed by a directory name\n");			return usage();		    }		    strncpy(glimpseindex_dir, argv[2], MAX_LINE_LEN);		    argc -= 2; argv += 2;		}		else if (strcmp(argv[1], "-P") == 0) {		    if (argc == 2) {			fprintf(stderr, "-P should be followed by a prefix for filenames\n");			return usage();		    }		    strncpy(filename_prefix, argv[2], MAX_LINE_LEN);		    argc -= 2; argv += 2;		}		else if (strcmp(argv[1], "-F") == 0) {			InputFilenames = 1;			argc --; argv ++;		}		else if (strcmp(argv[1], "-M") == 0) {			ReadIntoMemory = 1;			argc --; argv ++;		}		else if (strcmp(argv[1], "-U") == 0) {			InfoAfterFilename = 1;			argc --; argv ++;		}		else {			fprintf(stderr, "Invalid option %s\n", argv[1]);			return usage();		}	}	/* Check for errors */	if ((InputType == 0) || (OutputType == 0)) {		fprintf(stderr, "Must specify one of: -ib -bi -ni -in -nb -bn\n");		return usage();	}	return change_format(InputFilenames, ReadIntoMemory, InputType, OutputType, InputEndian, OutputEndian, glimpseindex_dir, filename_prefix);}/***************************************** * Prints out a help/usage message       * * Returns: nothing (exits from program) * *****************************************/intusage(){	fprintf(stderr, "\nusage: wgconvert {-ni,-in,-bn,-nb,-ib,-bi} [-li,-lo] [-F] [-H dir] [-M] [-P prefix] <infile >outfile\n");	fprintf(stderr, "`wgconvert' is used to change the format of neighbourhood files in `webglimpse'\n");	fprintf(stderr, "To change formats, the index must be built using `glimpseindex -h ...'\n\n");	fprintf(stderr, "There are 3 formats available:\n");	fprintf(stderr, "\t1. Complete path names of files (n)\n");	fprintf(stderr, "\t2. Indices of the files in .glimpse_filenames (i)\n");	fprintf(stderr, "\t3. A bit-mask of total#files bits for files in the neighborhood (b)\n");	fprintf(stderr, "We recommend options 1 or 2 since they are easy to use. To use #3, you must\n");	fprintf(stderr, "specify the proper `endian', since glimpse's -p option reads bits in 4B units.\n\n");	fprintf(stderr, "-ni: input is names file, output is indices file\n");	fprintf(stderr, "-in: input is indices file, output is names file\n");	fprintf(stderr, "-bn: input is bit-field file, output is names file\n");	fprintf(stderr, "-nb: input is names file, output is bit-field file\n");	fprintf(stderr, "-ib: input is indices file, output is bit-field file\n");	fprintf(stderr, "-bi: input is bit-field file, output is indices file\n");	fprintf(stderr, "-li: input bit-field file is little-endian (default big endian)\n");	fprintf(stderr, "-lo: output bit-field file is little-endian (default big endian)\n");	fprintf(stderr, "-F: expect filenames on stdin, not data of an input file\n\tIn this case, this program will convert each filename one by one\n");	fprintf(stderr, "-H dir: glimpse's index is in directory `dir'\n");	fprintf(stderr, "-M: cache some .glimpse* files in memory for speed\n\tUseful with -F when a lot of files are being wgconvert-ed at the same time\n");	fprintf(stderr, "-P prefix: prefix for filenames when -F option is used\n\tIf file=`/a/b.html', with `-P .nh.', wgconvert will access `/a/.nh.b.html'\n");	fprintf(stderr, "\nFor questions about wgconvert, please contact: `%s'\n", GLIMPSE_EMAIL);	exit(2);	return -1;	/* so that the compiler doesn't cry */}
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -