📄 uncast.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
	 * Actually, loop-unrolling was done here: you can combine them together but...	 */	if (easysearch)	{	/* compress was done in a context-free way to speed up searches */		while(1) {			if((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) return outlen;		bypass_getc1:			if (c == ONE_VERBATIM) {				if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;				if ((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) return outlen;				if (outfp != NULL) putc(c, outfp);	/* no processing whatsoever */				if (outbuf != NULL) outbuf[outlen] = c;				outlen ++;			}			else if (verbatim_state) {				if (c == END_VERBATIM) verbatim_state = 0;				else {					if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;					if (outfp != NULL) putc(c, outfp);					if (outbuf != NULL) outbuf[outlen] = c;					outlen ++;				}			}			else if (c < END_SPECIAL_CHARS) {				process_special_char(c)				if ( ((c == NEWLINE) || (c == TWONEWLINES)) && untilnewline)					return outlen;			}			else if (c == BEGIN_VERBATIM) {				if((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) return outlen;				if ((maxoutlen >= 0) && (outlen + 1>= maxoutlen)) return outlen;				if (outfp != NULL) putc(c, outfp);				if (outbuf != NULL) outbuf[outlen] = c;				outlen ++;				verbatim_state = 1;			}			else if (c == END_VERBATIM) {	/* not in verbatim state, still end_verbatim! */				verbatim_state = 0;				fprintf(stderr, "error in decompression after %d chars [verbatim processing]. skipping...\n", inlen);				UNCAST_ERRORS = 1;			}			else			{			above1:				if (c < RESERVED_CHARS) {	/* this is a special-word but not a special char */					process_special_char(c)				}				else {	/* it is an index of a word in the dictionary since 1st byte >= RESERVED_CHARS */					index = c;					index <<= 8;					if ((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) return outlen;					index |= c;					dindex = decode_index(index);					if(dindex < MAX_WORDS) {						if ((maxoutlen >= 0) && (outlen + AVG_WORD_LEN >= maxoutlen)) return outlen;						if (outfp != NULL) outlen += myfpcopy(outfp, compress_string_table[dindex]);						if (outbuf != NULL) {							outlen += mystrcpy(outbuf+outlen, compress_string_table[dindex]);						}						if ((maxoutlen >= 0) && (outlen >= maxoutlen)) return outlen;					}					else {						fprintf(stderr, "error in decomperssion after %d chars [bad index %x]. skipping...\n", inlen, index);						UNCAST_ERRORS = 1;					}				}			/* process_char_after_word1: */				/* now to see what follows the word: a blank or a special delimiter or not-blank */				if((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) {					if (!TC_FOUND_NOTBLANK) {						if (outfp != NULL) putc(' ', outfp);						if (outbuf != NULL) outbuf[outlen] = ' ';						outlen ++;					}					TC_FOUND_NOTBLANK = 0;	/* default: use result of previous forward_tcompressed_word only */					return outlen;				}				else if (c < MAX_SPECIAL_CHARS) {					if ((c < END_SPECIAL_DELIMITERS) && (c >= BEGIN_SPECIAL_DELIMITERS)) {						if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;						if (outfp != NULL) putc(special_delimiters[c - BEGIN_SPECIAL_DELIMITERS], outfp);						if (outbuf != NULL) outbuf[outlen] = special_delimiters[c - BEGIN_SPECIAL_DELIMITERS];						outlen ++;					}					else if (c != NOTBLANK) {						if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;						if (outfp != NULL) putc(' ', outfp);						if (outbuf != NULL) outbuf[outlen] = ' ';						outlen ++;						goto bypass_getc1;					}					/* else go normal getc */				}				else {	/* can be one of the special_words or a dictionary index */					if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;					if (outfp != NULL) putc(' ', outfp);					if (outbuf != NULL) outbuf[outlen] = ' ';					outlen ++;					goto above1;				}			}		}	}	else	{	/* compression was done in a context sensitive fashion w/o regards to search */		while(1) {			if((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) return outlen;		bypass_getc2:			if (verbatim_state) {				if (c == END_VERBATIM) verbatim_state = 0;				else if (c == BEGIN_VERBATIM) goto verbatim_processing;				else {					if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;					if (outfp != NULL) putc(c, outfp);					if (outbuf != NULL) outbuf[outlen] = c;					outlen ++;				}			}			else if (c < END_SPECIAL_CHARS) {				process_special_char(c)				if ( ((c == NEWLINE) || (c == TWONEWLINES)) && untilnewline)				return outlen;			}			else if (c == BEGIN_VERBATIM) {			verbatim_processing:				if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;				if((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) return outlen;				if (outfp != NULL) putc(c, outfp);				if (outbuf != NULL) outbuf[outlen] = c;				outlen ++;				if ((c!=BEGIN_VERBATIM) && (c!=END_VERBATIM)) verbatim_state = 1;	/* only _these_ are escape characters */			}			else if (c == END_VERBATIM) {	/* not in verbatim state, still end_verbatim! */				verbatim_state = 0;				fprintf(stderr, "error in decompression after %d chars [verbatim processing]. skipping...\n", inlen);				UNCAST_ERRORS = 1;			}			else			{			above2:				if (c < RESERVED_CHARS) {	/* this is a special-word but not a special char */					process_special_char(c)				}				else {	/* it is an index of a word in the dictionary since 1st byte >= RESERVED_CHARS */					index = c;					index <<= 8;					if ((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) return outlen;					index |= c;					dindex = decode_index(index);					if(dindex < MAX_WORDS) {						if ((maxoutlen >= 0) && (outlen + AVG_WORD_LEN >= maxoutlen)) return outlen;						if (outfp != NULL) outlen += myfpcopy(outfp, compress_string_table[dindex]);						if (outbuf != NULL) {							outlen += mystrcpy(outbuf+outlen, compress_string_table[dindex]);						}						if ((maxoutlen >= 0) && (outlen >= maxoutlen)) return outlen;					}					else {						fprintf(stderr, "error in decomperssion after %d chars [bad index %x]. skipping...\n", inlen, index);						UNCAST_ERRORS = 1;					}				}			/* process_char_after_word2: */				/* now to see what follows the word: a blank or a special delimiter or not-blank */				if((c = mygetc(infp, inbuf, maxinlen, &inlen)) == MYEOF) {					if (!TC_FOUND_NOTBLANK) {						if (outfp != NULL) putc(' ', outfp);						if (outbuf != NULL) outbuf[outlen] = ' ';						outlen ++;					}					TC_FOUND_NOTBLANK = 0;	/* default: use result of previous forward_tcompressed_word only */					return outlen;				}				else if (c < MAX_SPECIAL_CHARS) {					if ((c < END_SPECIAL_DELIMITERS) && (c >= BEGIN_SPECIAL_DELIMITERS)) {						if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;						if (outfp != NULL) putc(special_delimiters[c - BEGIN_SPECIAL_DELIMITERS], outfp);						if (outbuf != NULL) outbuf[outlen] = special_delimiters[c - BEGIN_SPECIAL_DELIMITERS];						outlen ++;					}					else if (c != NOTBLANK) {						if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;						if (outfp != NULL) putc(' ', outfp);						if (outbuf != NULL) outbuf[outlen] = ' ';						outlen ++;						goto bypass_getc2;					}					/* else go normal getc */				}				else {	/* can be one of the special_words or a dictionary index */					if ((maxoutlen >= 0) && (outlen + 1 >= maxoutlen)) return outlen;					if (outfp != NULL) putc(' ', outfp);					if (outbuf != NULL) outbuf[outlen] = ' ';					outlen ++;					goto above2;				}			}		}	}}#define FUNCTION	tuncompress_file#define DIRECTORY	tuncompress_directory#include "trecursive.c"/* returns #bytes (>=0) in the uncompressed file, -1 if major error (not able to uncompress) */inttuncompress_file(name, outname, flags)	char	*name;	char	*outname;	int	flags;{	FILE	*fp;	FILE	*outfp;	int	inlen, ret;	struct stat statbuf;	/* struct timeval tvp[2]; */	struct utimbuf tvp;	char	tempname[MAX_LINE_LEN];	if (name == NULL) return -1;	special_get_name(name, -1, tempname);	if (-1 == stat(tempname, &statbuf)) {		if (flags & TC_ERRORMSGS)			fprintf(stderr, "permission denied or non-existent: %s\n", tempname);		return -1;	}	if (S_ISDIR(statbuf.st_mode)) {		if (flags & TC_RECURSIVE) return tuncompress_directory(tempname, outname, flags);		if (flags & TC_ERRORMSGS)			fprintf(stderr, "skipping directory: %s\n", tempname);		return -1;	}        if (!S_ISREG(statbuf.st_mode)) {                if (flags & TC_ERRORMSGS)			fprintf(stderr, "not a regular file, skipping: %s\n", tempname);                return -1;	}	inlen = strlen(tempname);	if (!tuncompressible_filename(tempname, inlen)) {		if (!(flags & TC_RECURSIVE) && (flags & TC_ERRORMSGS))			fprintf(stderr, "no %s extension, skipping: %s\n", COMP_SUFFIX, tempname);		return -1;	}	if ((fp = fopen(tempname, "r")) == NULL) {		if (flags & TC_ERRORMSGS)			fprintf(stderr, "permission denied or non-existent: %s\n", tempname);		return -1;	}	if (!tuncompressible_fp(fp)) {		if (flags & TC_ERRORMSGS)			fprintf(stderr, "signature does not match, skipping: %s\n", tempname);		fclose(fp);		return -1;	}	if (flags & TC_SILENT) {		printf("%s\n", tempname);		fclose(fp);		return 0;	}	/* Create and open output file */	strncpy(outname, tempname, MAX_LINE_LEN);	outname[inlen - strlen(COMP_SUFFIX)] = '\0';	if (!access(outname, R_OK)) {		if (!(flags & TC_OVERWRITE)) {			fclose(fp);			return 0;		}		else if (!(flags & TC_NOPROMPT)) {			char	s[8];			printf("overwrite %s? (y/n): ", outname);			scanf("%c", s);			if (s[0] != 'y') {				fclose(fp);				return 0;			}		}	}	if ((outfp = fopen(outname, "w")) == NULL) {		if (flags & TC_ERRORMSGS)			fprintf(stderr, "cannot open for writing: %s\n", outname);		fclose(fp);		return -1;	}	UNCAST_ERRORS = 0;	if ( ((ret = tuncompress(fp, -1, outfp, -1, flags)) > 0) && !UNCAST_ERRORS && (flags & TC_REMOVE)) {		unlink(tempname);	}	fclose(fp);	fflush(outfp);	fclose(outfp);	/*	tvp[0].tv_sec = statbuf.st_atime;	tvp[0].tv_usec = 0;	tvp[1].tv_sec = statbuf.st_mtime;	tvp[1].tv_usec = 0;	utimes(outname, tvp);	*/	tvp.actime = statbuf.st_atime;	tvp.modtime = statbuf.st_mtime;	utime(outname, &tvp);	return ret;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -