⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 newmgrep.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
 				if (*px == 0) {					if(text > textend) return 0;					if (!DOWITHMASK) {						/* Don't update CurrentByteOffset here: only before outputting properly */						if (!DELIMITER) {							curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));							if (*curtextbegin == '\n') curtextbegin ++;							curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;							if (*curtextend == '\n') curtextend ++;						}						else {							curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);							curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, tc_D_pattern, tc_D_length, OUTTAIL);						}					}					/* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */#if	MEASURE_TIMES					gettimeofday(&initt, NULL);#endif	/*MEASURE_TIMES*/					/* Was it really a match in the compressed line from prev line in text to text + strlen(tc_pat_len[pat_index]? */					if (-1==exists_tcompressed_word(tc_PatPtr[p], tc_pat_len[pat_index], curtextbegin, text - curtextbegin + tc_pat_len[pat_index], EASYSEARCH))						goto skip_output;#if     MEASURE_TIMES					gettimeofday(&finalt, NULL);					FILTERALGO_ms +=  (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);#endif  /*MEASURE_TIMES*/					if (!DOWITHMASK) {						if (!OUTTAIL || INVERSE) textbegin = curtextend;						else if (DELIMITER) textbegin = curtextend - D_length;						else textbegin = curtextend - 1;					}					DOWITHMASK = 1;					if (pat_index <= anum_terminals) {						int	iii;						amatched_terminals[pat_index - 1] = 1;						for (iii=0; iii<anum_terminals; iii++)							if (aduplicates[pat_index - 1][iii])								amatched_terminals[iii] = 1;					}					if (AComplexBoolean) {						/* Can output only after all the matches in the current record have been identified: just like filter_output */						oldtext = text;						CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);						text = oldtext + pat_len[pat_index] - 1;						MATCHED = 0;						goto skip_output;					}					else if ((long)AParse & AND_EXP) {						for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;						if (j<anum_terminals) goto skip_output;					}					MATCHED=1;                                        oldtext = text; /* only for MULTI_OUTPUT */#undef	DO_OUTPUT#define DO_OUTPUT(change_text)\					num_of_matched++;\					if(FILENAMEONLY || SILENT)  return 0;\					if (!COUNT) {\						if ((PRINTED = print_options(pat_index, text, curtextbegin, curtextend)) == -1) return -1;\						if(!INVERSE) {\							if (PRINTRECORD) {\/* #if     MEASURE_TIMES\							gettimeofday(&initt, NULL);\*/ /*#endif  MEASURE_TIMES */\							if (agrep_finalfp != NULL)\								newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);\							else {\								if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\									if (newlen + agrep_outpointer >= agrep_outlen) {\										OUTPUT_OVERFLOW;\										return -1;\									}\									agrep_outpointer += newlen;\								}\							}\/* #if     MEASURE_TIMES\							gettimeofday(&finalt, NULL);\							OUTFILTER_ms += (finalt.tv_sec* 1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);\*/ /*#endif  MEASURE_TIMES */\							}\							else if (PRINTED) {\								if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\								else agrep_outbuffer[agrep_outpointer ++] = '\n';\								PRINTED = 0;\							}\                                                        if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\								CurrentByteOffset += (oldtext + tc_pat_len[pat_index] - 1 - text);\                                                                text = oldtext + tc_pat_len[pat_index] - 1;\                                                                MATCHED = 0;\                                                        }\							else if (change_text) {\								CurrentByteOffset += textbegin - text;\								text = textbegin;\							}\						}\						else {	/* INVERSE: Don't care about filtering time */\							/* if(lastout < curtextbegin) OUT=1; */\							if (!SILENT) {\							if (agrep_finalfp != NULL)\								newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);\							else {\								if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\									if (newlen + agrep_outpointer >= agrep_outlen) {\										OUTPUT_OVERFLOW;\										return -1;\									}\									agrep_outpointer += newlen;\								}\							}\							}\							lastout=textbegin;\							if (change_text) {\								CurrentByteOffset += textbegin - text;\								text = textbegin;\							}\						}\					}\					else if (change_text) {\						CurrentByteOffset += textbegin - text;\						text = textbegin;\					}\					if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\					    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */\					DO_OUTPUT(1)				}			skip_output:                                if (MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;    /* else look for more possible matches since we never know how many will match */				if (DOWITHMASK && (text >= curtextend - 1)) {					DOWITHMASK = 0;					if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {						DO_OUTPUT(0)					}					if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);				}			}			/* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */			if (DOWITHMASK && (text >= curtextend - 1)) {				DOWITHMASK = 0;				if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {					DO_OUTPUT(0)				}				if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);			}			if(!MATCHED) shift = 1;	/* || MULTI_OUTPUT is implicit */			else {				MATCHED = 0;				shift = m1 - 1 > 0 ? m1 - 1 : 1;			}		}		/* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */		if (DOWITHMASK && (text >= curtextend - 1)) {			DOWITHMASK = 0;			if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {				DO_OUTPUT(0)			}			if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);		}		text += shift;		CurrentByteOffset += shift;	}	/* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */	if (DOWITHMASK && (text >= curtextend - 1)) {		DOWITHMASK = 0;		if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {			DO_OUTPUT(0)		}		if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);	}	if (INVERSE && !COUNT && (lastout <= textend)) {		if (!SILENT) {		if (agrep_finalfp != NULL)			newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);		else {			if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {				if (newlen + agrep_outpointer >= agrep_outlen) {					OUTPUT_OVERFLOW;					return -1;				}				agrep_outpointer += newlen;			}		}		}	}	return 0;}#endif	/*DOTCOMPRESSED*//* shift is always 1: slight change in MATCHED semantics: it is set to 1 even if COUNT is set: previously, it wasn't set. Will it effect m_short? */intm_short(text, start, end)int start, end; register uchar *text;{	int m1=1;	int PRINTED = 0;	int pat_index;        unsigned char *oldtext;	register uchar *textend;	unsigned char *textbegin;	unsigned char *curtextend;	unsigned char *curtextbegin;	register int p, p_end;	int MATCHED=0;	/* int OUT=0; */	uchar *lastout;	uchar *qx;	uchar *px;	int j;	int DOWITHMASK;	DOWITHMASK = 0;	if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);	textend = text + end;	lastout = text + start;	textbegin = text + start;	text = text + start - 1 ;/*	if (WORDBOUND || WHOLELINE) text = text-1;*/	if (WHOLELINE) text = text-1;		/* to accomodate the extra 2 W_delim */	while (++text <= textend) {		CurrentByteOffset ++;		p = HASH[tr[*text]];		p_end = HASH[tr[*text]+1];		while(p++ < p_end) {			if (((pat_index = pat_indices[p]) <= 0) || (pat_len[pat_index] <= 0)) continue;#ifdef	debug			printf("m_short(): p=%d pat_index=%d off=%d\n", p, pat_index, textend - text);#endif			px = PatPtr[p];			qx = text;			while((*px!=0)&&(tr[*px] == tr[*qx])) {				px++;				qx++;			}			if (*px == 0) {				if(text >= textend) return 0;				if (WORDBOUND) {					if (isalnum(*(unsigned char *)qx)) goto skip_output;					if (isalnum(*(unsigned char *)(text-1))) goto skip_output;				}                                if (!DOWITHMASK) {                                        /* Don't update CurrentByteOffset here: only before outputting properly */                                        if (!DELIMITER) {						curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));						if (*curtextbegin == '\n') curtextbegin ++;						curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;						if (*curtextend == '\n') curtextend ++;                                        }                                        else {                                                curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);                                                curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, D_pattern, D_length, OUTTAIL);                                        }					if (!OUTTAIL || INVERSE) textbegin = curtextend;					else if (DELIMITER) textbegin = curtextend - D_length;					else textbegin = curtextend - 1;                                }                                /* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */				DOWITHMASK = 1;				if (pat_index <= anum_terminals) {					int	iii;					amatched_terminals[pat_index - 1] = 1;					for (iii=0; iii<anum_terminals; iii++)						if (aduplicates[pat_index - 1][iii])							amatched_terminals[iii] = 1;				}					if (AComplexBoolean) {					/* Can output only after all the matches in the current record have been identified: just like filter_output */					oldtext = text;					CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);					text = oldtext + pat_len[pat_index] - 1;					MATCHED = 0;					goto skip_output;				}				else if ((long)AParse & AND_EXP) {					for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;					if (j<anum_terminals) goto skip_output;				}				MATCHED = 1;				oldtext = text; /* used only if MULTI_OUTPUT */#undef	DO_OUTPUT#define DO_OUTPUT(change_text)\				num_of_matched++;\				if(FILENAMEONLY || SILENT)  return 0;\				if (!COUNT) {\					if ((PRINTED = print_options(pat_index, text, curtextbegin, curtextend)) == -1) return -1;\					if(!INVERSE) {\						if (PRINTRECORD) {\						if (agrep_finalfp != NULL) {\							fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);\						}\						else {\							if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {\								OUTPUT_OVERFLOW;\								return -1;\							}\							else {\								memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend-curtextbegin);\								agrep_outpointer += curtextend - curtextbegin;\							}\						}\						}\						else if (PRINTED) {\							if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\							else agrep_outbuffer[agrep_outpointer ++] = '\n';\							PRINTED = 0;\						}\                                                if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\							CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);\                                                        text = oldtext + pat_len[pat_index] - 1;\                                                        MATCHED = 0;\                                                }\						else if (change_text) {\							CurrentByteOffset += textbegin - text;\							text = textbegin;\						}\					}\					else {\                                                /* if(lastout < curtextbegin) OUT=1; */\						if (!SILENT) {\						if (agrep_finalfp != NULL)\							fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);\						else {\							if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {\								OUTPUT_OVERFLOW;\								return -1;\							}\							memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);\							agrep_outpointer += (curtextbegin-lastout);\						}\						}\                                                lastout=textbegin;\						if (change_text) {\							CurrentByteOffset += textbegin - text;\							text = textbegin;\						}\					}\				}\				else if (change_text) {\					CurrentByteOffset += textbegin - text;\					text = textbegin;\				}\				if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\				    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */\				DO_OUTPUT(1)			}		skip_output:                        if(MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;     /* else look for more possible matches */			if (DOWITHMASK && (text >= curtextend - 1)) {				DOWITHMASK = 0;				if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {					DO_OUTPUT(0)				}				if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);			}		}		/* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */		if (DOWITHMASK && (text >= curtextend - 1)) {			DOWITHMASK = 0;			if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {				DO_OUTPUT(0)			}			if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);		}		if (MATCHED) text --;		MATCHED = 0;	} /* while */	CurrentByteOffset ++;	/* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */	if (DOWITHMASK && (text >= curtextend - 1)) {		DOWITHMASK = 0;		if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {			DO_OUTPUT(0)		}		if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);	}        if(INVERSE && !COUNT && (lastout <= textend)) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -