⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 newmgrep.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
		printf("prepf(): tc_patt[%d]=%s, tc_pat_len[%d]=%d\n", i, tc_patt[i], i, tc_pat_len[i]);#endif		if(p!=0 && p < tc_p_size) tc_p_size = p;	/* MIN */	}	if(tc_p_size == 0) {	/* cannot happen NOW */		fprintf(stderr, "%s: the pattern file is empty\n", Progname);                if (!EXITONERROR) {                        errno = AGREP_ERROR;                        return -1;                }                else exit(2);	}	if(length > 400 && tc_p_size > 2) tc_LONG = 1;	if(tc_p_size == 1) tc_SHORT = 1;	for(i=0; i<MAXMEMBER1; i++) tc_SHIFT1[i] = tc_p_size - 1 - LONG;	for(i=0; i<MAXHASH; i++) {		tc_HASH[i] = 0;	}	for(i=1; i<=tc_num_pat; i++) tc_f_prep(i, tc_patt[i]);	tc_accumulate();	memset(tc_pat_indices, '\0', sizeof(int) * (tc_num_pat + 1));	for(i=1; i<=tc_num_pat; i++) tc_f_prep1(i, tc_patt[i]);	acompute_duplicates(tc_aduplicates, aterminals, anum_terminals, tc_tr);#if	MEASURE_TIMES	gettimeofday(&finalt, NULL);	INFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif	/*MEASURE_TIMES*/        return 0;}#endif	/*DOTCOMPRESSED*/intmgrep(fd)int fd;{ 	register char r_newline = '\n';	unsigned char *text;	register int buf_end, num_read, start, end, residue = 0;	int	oldCurrentByteOffset;	int	first_time = 1;#if     AGREP_POINTER        if (fd != -1) {#endif  /*AGREP_POINTER*/                alloc_buf(fd, &text, 2*BlockSize+Max_record);		text[Max_record-1] = '\n';  /* initial case */		start = Max_record;		while( (num_read = fill_buf(fd, text+Max_record, 2*BlockSize)) > 0) 		{			buf_end = end = Max_record + num_read -1 ;			oldCurrentByteOffset = CurrentByteOffset;			if (first_time) {				if ((TCOMPRESSED == ON) && tuncompressible(text+Max_record, num_read)) {					EASYSEARCH = text[Max_record+SIGNATURE_LEN-1];					start += SIGNATURE_LEN;					CurrentByteOffset += SIGNATURE_LEN;					if (!EASYSEARCH) {						fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);					}				}				else TCOMPRESSED = OFF;				first_time = 0;			}			if (!DELIMITER) {				while(text[end]  != r_newline && end > Max_record) end--;				text[start-1] = r_newline;			}			else {				unsigned char *newbuf = text + end + 1;				newbuf = backward_delimiter(newbuf, text+Max_record, D_pattern, D_length, OUTTAIL);	/* see agrep.c/'d' */				if (newbuf < text+Max_record+D_length) newbuf = text + end + 1;				end = newbuf - text - 1;				memcpy(text+start-D_length, D_pattern, D_length);			}			residue = buf_end - end  + 1 ;			if(INVERSE && COUNT) countline(text+Max_record, num_read);			/* MGREP_PROCESS */			if (TCOMPRESSED) {	/* separate functions since separate globals => too many if-statements within a single function makes it slow */#if	DOTCOMPRESSED				if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}				else      { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}#endif	/*DOTCOMPRESSED*/			}			else {				if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}				else      { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}			}                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {				if (agrep_finalfp != NULL)					fprintf(agrep_finalfp, "%s", CurrentFileName);				else {					int outindex;					for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&							(CurrentFileName[outindex] != '\0'); outindex++) {						agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];					}					if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {						OUTPUT_OVERFLOW;						free_buf(fd, text);						return -1;					}					agrep_outpointer += outindex;				}				if (PRINTFILETIME) {					char *s = aprint_file_time(CurrentFileTime);					if (agrep_finalfp != NULL)						fprintf(agrep_finalfp, "%s", s);					else {						int outindex;						for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&								(s[outindex] != '\0'); outindex++) {							agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];						}						if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {							OUTPUT_OVERFLOW;							free_buf(fd, text);							return -1;						}						agrep_outpointer += outindex;					}				}				if (agrep_finalfp != NULL)					fprintf(agrep_finalfp, "\n");				else {					if (agrep_outpointer+1>=agrep_outlen) {						OUTPUT_OVERFLOW;						free_buf(fd, text);						return -1;					}					else agrep_outbuffer[agrep_outpointer++] = '\n';				}                                free_buf(fd, text);                                NEW_FILE = OFF;                                return 0;                        }			CurrentByteOffset = oldCurrentByteOffset + end - start + 1;			start = Max_record - residue;			if(start < 0) {				start = 1; 			}			strncpy(text+start, text+end, residue);			if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||			    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {				free_buf(fd, text);				return 0;	/* done */			}		} /* end of while(num_read = ... */		if (!DELIMITER) {			text[start-1] = '\n';			text[start+residue] = '\n';		}		else {			if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);			memcpy(text+start+residue, D_pattern, D_length);		}		end = start + residue;		if(residue > 1) {			if (TCOMPRESSED) {#if	DOTCOMPRESSED				if(tc_SHORT) tc_m_short(text, start, end);				else      tc_monkey1(text, start, end);#endif	/*DOTCOMPRESSED*/			}			else {				if(SHORT) m_short(text, start, end);				else      monkey1(text, start, end);			}                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {				if (agrep_finalfp != NULL)					fprintf(agrep_finalfp, "%s", CurrentFileName);				else {					int outindex;					for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&							(CurrentFileName[outindex] != '\0'); outindex++) {						agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];					}					if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {						OUTPUT_OVERFLOW;						free_buf(fd, text);						return -1;					}					agrep_outpointer += outindex;				}				if (PRINTFILETIME) {					char *s = aprint_file_time(CurrentFileTime);					if (agrep_finalfp != NULL)						fprintf(agrep_finalfp, "%s", s);					else {						int outindex;						for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&								(s[outindex] != '\0'); outindex++) {							agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];						}						if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {							OUTPUT_OVERFLOW;							free_buf(fd, text);							return -1;						}						agrep_outpointer += outindex;					}				}				if (agrep_finalfp != NULL)					fprintf(agrep_finalfp, "\n");				else {					if (agrep_outpointer+1>=agrep_outlen) {						OUTPUT_OVERFLOW;						free_buf(fd, text);						return -1;					}					else agrep_outbuffer[agrep_outpointer++] = '\n';				}                                free_buf(fd, text);                                NEW_FILE = OFF;                                return 0;                        }		}		free_buf(fd, text);		return (0);#if	AGREP_POINTER	}	else {                text = (unsigned char *)agrep_inbuffer;                num_read = agrep_inlen;                start = 0;                buf_end = end = num_read - 1;			oldCurrentByteOffset = CurrentByteOffset;			if (first_time) {				if ((TCOMPRESSED == ON) && tuncompressible(text+Max_record, num_read)) {					EASYSEARCH = text[Max_record+SIGNATURE_LEN-1];					start += SIGNATURE_LEN;					CurrentByteOffset += SIGNATURE_LEN;					if (!EASYSEARCH) {						fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);					}				}				else TCOMPRESSED = OFF;				first_time = 0;			}			if (!DELIMITER)				while(text[end]  != r_newline && end > 1) end--;			else {                                unsigned char *newbuf = text + end + 1;                                newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL);        /* see agrep.c/'d' */				if (newbuf < text+D_length) newbuf = text + end + 1;                                end = newbuf - text - 1;			}			/* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end] */			if (INVERSE && COUNT) countline(text, num_read);                        /* An exact copy of the above MGREP_PROCESS */			if (TCOMPRESSED) {	/* separate functions since separate globals => too many if-statements within a single function makes it slow */#if	DOTCOMPRESSED				if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}				else      { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}#endif	/*DOTCOMPRESSED*/			}			else {				if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}				else      { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}			}                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {				if (agrep_finalfp != NULL)					fprintf(agrep_finalfp, "%s", CurrentFileName);				else {					int outindex;					for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&							(CurrentFileName[outindex] != '\0'); outindex++) {						agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];					}					if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {						OUTPUT_OVERFLOW;						free_buf(fd, text);						return -1;					}					agrep_outpointer += outindex;				}				if (PRINTFILETIME) {					char *s = aprint_file_time(CurrentFileTime);					if (agrep_finalfp != NULL)						fprintf(agrep_finalfp, "%s", s);					else {						int outindex;						for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&								(s[outindex] != '\0'); outindex++) {							agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];						}						if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {							OUTPUT_OVERFLOW;							free_buf(fd, text);							return -1;						}						agrep_outpointer += outindex;					}				}				if (agrep_finalfp != NULL)					fprintf(agrep_finalfp, "\n");				else {					if (agrep_outpointer+1>=agrep_outlen) {						OUTPUT_OVERFLOW;						free_buf(fd, text);						return -1;					}					else agrep_outbuffer[agrep_outpointer++] = '\n';				}                                free_buf(fd, text);                                NEW_FILE = OFF;                                return 0;                        }                return 0;	}#endif	/*AGREP_POINTER*/#ifdef perf_check	fprintf(stderr,"Shifted %d times; shift=0 %d times; hash was = %d times\n",cshift, cshift0, chash);	return 0;#endif} /* end mgrep */static voidcountline(text, len)unsigned char *text; int len;{int i;	for (i=0; i<len; i++) if(text[i] == '\n') total_line++;}/* Stuff that always needs to be printed whenever there is a match in all functions in this file */intprint_options(pat_index, text, curtextbegin, curtextend)	int	pat_index;	unsigned char	*text, *curtextbegin, *curtextend;{	int	PRINTED = 0;	if (SILENT) return 0;	if(FNAME && (NEW_FILE || !POST_FILTER)) {		char    nextchar = (POST_FILTER == ON)?'\n':' ';		char    *prevstring = (POST_FILTER == ON)?"\n":"";		if (agrep_finalfp != NULL)			fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName);		else {			int outindex;			if (prevstring[0] != '\0') {				if(agrep_outpointer + 1 >= agrep_outlen) {					OUTPUT_OVERFLOW;					return -1;				}				else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];			}			for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&					(CurrentFileName[outindex] != '\0'); outindex++) {				agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];			}			if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {				OUTPUT_OVERFLOW;				return -1;			}			agrep_outpointer += outindex;		}		if (PRINTFILETIME) {			char *s = aprint_file_time(CurrentFileTime);			if (agrep_finalfp != NULL)				fprintf(agrep_finalfp, "%s", s);			else {				int outindex;				for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&						(s[outindex] != '\0'); outindex++) {					agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];				}				if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {					OUTPUT_OVERFLOW;					return -1;				}				agrep_outpointer += outindex;			}		}		if (agrep_finalfp != NULL)			fprintf(agrep_finalfp, ":%c", nextchar);		else {			if (agrep_outpointer+2>= agrep_outlen) {				OUTPUT_OVERFLOW;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -