⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgrep.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
								OUTPUT_OVERFLOW;								return -1;							}							else {								agrep_outbuffer[agrep_outpointer++] = ':';								agrep_outbuffer[agrep_outpointer++] = nextchar;							}						}						NEW_FILE = OFF;						PRINTED = 1;					}					if(BYTECOUNT) {						if (agrep_finalfp != NULL)							fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);						else {							char s[32];							int  outindex;							sprintf(s, "%d= ", CurrentByteOffset);							for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&									(s[outindex] != '\0'); outindex++) {								agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];							}							if (s[outindex] != '\0') {								OUTPUT_OVERFLOW;								return -1;							}							agrep_outpointer += outindex;						}						PRINTED = 1;					}					if (PRINTOFFSET) {						if (agrep_finalfp != NULL)							fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);						else {							char s[32];							int outindex;							sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);							for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&									 (s[outindex] != '\0'); outindex ++) {								agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];							}							if (s[outindex] != '\0') {								OUTPUT_OVERFLOW;								return -1;							}							agrep_outpointer += outindex;						}						PRINTED = 1;					}					CurrentByteOffset += textbegin - text;					text = textbegin;					if (PRINTRECORD) {					if (TCOMPRESSED == ON) {#if	MEASURE_TIMES						gettimeofday(&initt, NULL);#endif	/*MEASURE_TIMES*/						if (agrep_finalfp != NULL)							newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);						else {							if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {								if (agrep_outpointer + newlen + 1 >= agrep_outlen) {									OUTPUT_OVERFLOW;									return -1;								}								agrep_outpointer += newlen;							}						}#if	MEASURE_TIMES						gettimeofday(&finalt, NULL);						OUTFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif	/*MEASURE_TIMES*/					}					else {						if (agrep_finalfp != NULL) {							fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);						}						else {							if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {								OUTPUT_OVERFLOW;								return -1;							}							memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);							agrep_outpointer += curtextend - curtextbegin;						}					}					}					else if (PRINTED) {						if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);						else agrep_outbuffer[agrep_outpointer ++] = '\n';						PRINTED = 0;					}				}				else {	/* INVERSE */					if (!SILENT) {					if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */						if (agrep_finalfp != NULL)							newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);						else {							if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {								if (newlen + agrep_outpointer >= agrep_outlen) {									OUTPUT_OVERFLOW;									return -1;								}								agrep_outpointer += newlen;							}						}						lastout=textbegin;						CurrentByteOffset += textbegin - text;						text = textbegin;					}					else { /* NOT TCOMPRESSED */						if (agrep_finalfp != NULL)							fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);						else {							if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {								OUTPUT_OVERFLOW;								return -1;							}							memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);							agrep_outpointer += (curtextbegin - lastout);						}						lastout=textbegin;						CurrentByteOffset += textbegin - text;						text = textbegin;					} /* TCOMPRESSED */					} /* !SILENT */				} /* INVERSE */			}			else {	/* COUNT */				CurrentByteOffset += textbegin - text;				text = textbegin;			}			/* Counteract the ++ below */			text --;			CurrentByteOffset --;			if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||			    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */		}	CONT:		text++;		CurrentByteOffset ++;	}	if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) {		if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */			if (agrep_finalfp != NULL)				newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);			else {				if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {					if (newlen + agrep_outpointer >= agrep_outlen) {						OUTPUT_OVERFLOW;						return -1;					}					agrep_outpointer += newlen;				}			}		}		else { /* NOT TCOMPRESSED */			if (agrep_finalfp != NULL)				fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);			else {				if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {					OUTPUT_OVERFLOW;					return -1;				}				memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);				agrep_outpointer += (textend - lastout + 1);			}		} /* TCOMPRESSED */	}	return 0;}/* a_monkey() the approximate monkey move */inta_monkey( pat, m, text, textend, D ) register int m, D ; register CHARTYPE *text, *textend, *pat;{	int PRINTED = 0;	register CHARTYPE *oldtext;	CHARTYPE *curtextbegin;	CHARTYPE *curtextend;	register unsigned hash, hashmask, suffix_error; 	register int  m1 = m-1-D, pos; 	CHARTYPE *textbegin = text;	CHARTYPE *textstart;	CHARTYPE *lastout = text;	int newlen;	hashmask = Hashmask;	oldtext  = text;	while (text < textend) {		textstart = text;		text = text+m1;		suffix_error = 0;		while(suffix_error <= D) {			hash = *text--;			while(MEMBER_1[hash]) {				hash = ((hash << LOG_ASCII) + *(text--)) & hashmask;			}			suffix_error++;		}		CurrentByteOffset += text - textstart;		if(text <= oldtext) {			if((pos = verify(m, 2*m+D, D, pat, oldtext)) > 0)  {				CurrentByteOffset += (oldtext+pos - text);				text = oldtext+pos;				if(text > textend) return 0;				/* Don't update CurrentByteOffset here: only before outputting properly */				if (TCOMPRESSED == ON) {					if (!DELIMITER) {						curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));						if (*curtextbegin == '\n') curtextbegin ++;						curtextend = curtextbegin /*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;						if (*curtextend == '\n') curtextend ++;					}					else {						curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);						curtextend = forward_delimiter(curtextbegin /*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);					}				}				else {					if (!DELIMITER) {						curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));						if (*curtextbegin == '\n') curtextbegin ++;						curtextend = curtextbegin/*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;						if (*curtextend == '\n') curtextend ++;					}					else {						curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);						curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL);					}				}				textbegin = curtextend; /* (curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */				num_of_matched++;				if(FILENAMEONLY) return 0;				if(!COUNT) {					if (!INVERSE) {						if(FNAME && (NEW_FILE || !POST_FILTER)) {							char	nextchar = (POST_FILTER == ON)?'\n':' ';							char	*prevstring = (POST_FILTER == ON)?"\n":"";							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);							else {								int outindex;								if (prevstring[0] != '\0') {									if(agrep_outpointer + 1 >= agrep_outlen) {										OUTPUT_OVERFLOW;										return -1;									}									else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];								}								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(CurrentFileName[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];								}								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {									OUTPUT_OVERFLOW;									return -1;								}								agrep_outpointer += outindex;							}							if (PRINTFILETIME) {								char *s = aprint_file_time(CurrentFileTime);								if (agrep_finalfp != NULL)									fprintf(agrep_finalfp, "%s", s);								else {									int outindex;									for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&											(s[outindex] != '\0'); outindex++) {										agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];									}									if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {										OUTPUT_OVERFLOW;										return -1;									}									agrep_outpointer += outindex;								}							}							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, ":%c", nextchar);							else {								if (agrep_outpointer+2>= agrep_outlen) {									OUTPUT_OVERFLOW;									return -1;								}								else {									agrep_outbuffer[agrep_outpointer++] = ':';									agrep_outbuffer[agrep_outpointer++] = nextchar;								}							}							NEW_FILE = OFF;							PRINTED = 1;						}						if(BYTECOUNT) {							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);							else {								char s[32];								int  outindex;								sprintf(s, "%d= ", CurrentByteOffset);								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(s[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];								}								if (s[outindex] != '\0') {									OUTPUT_OVERFLOW;									return -1;								}								agrep_outpointer += outindex;							}							PRINTED = 1;						}						if (PRINTOFFSET) {							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);							else {								char s[32];								int outindex;								sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);								for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										 (s[outindex] != '\0'); outindex ++) {									agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];								}								if (s[outindex] != '\0') {									OUTPUT_OVERFLOW;									return -1;								}								agrep_outpointer += outindex;							}							PRINTED = 1;						}						CurrentByteOffset += textbegin - text;						text = textbegin;						if (PRINTRECORD) {						if (TCOMPRESSED == ON) {#if	MEASURE_TIMES							gettimeofday(&initt, NULL);#endif	/*MEASURE_TIMES*/							if (agrep_finalfp != NULL)								newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);							else {								if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {									if (agrep_outpointer + newlen + 1 >= agrep_outlen) {										OUTPUT_OVERFLOW;										return -1;									}									agrep_outpointer += newlen;								}							}#if	MEASURE_TIMES							gettimeofday(&finalt, NULL);							OUTFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif	/*MEASURE_TIMES*/						}						else {							if (agrep_finalfp != NULL) {								fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);							}							else {								if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {									OUTPUT_OVERFLOW;									return -1;								}								memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);								agrep_outpointer += curtextend - curtextbegin;							}						}						}						else if (PRINTED) {							if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);							else agrep_outbuffer[agrep_outpointer ++] = '\n';							PRINTED = 0;						}					}					else {	/* INVERSE */						if (!SILENT) {						if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */							if (agrep_finalfp != NULL)								newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);							else {								if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {									if (newlen + agrep_outpointer >= agrep_outlen) {										OUTPUT_OVERFLOW;										return -1;									}									agrep_outpointer += newlen;								}							}							lastout=textbegin;							CurrentByteOffset += textbegin - text;							text = textbegin;						}						else { /* NOT TCOMPRESSED */							if (agrep_finalfp != NULL)								fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);							else {								if (curtextbegin - lastout + agrep_ou

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -