⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 newmgrep.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
				return -1;			}			else {				agrep_outbuffer[agrep_outpointer++] = ':';				agrep_outbuffer[agrep_outpointer++] = nextchar;			}		}		NEW_FILE = OFF;		PRINTED = 1;	}	if (PRINTPATTERN) {		if (agrep_finalfp != NULL)			fprintf(agrep_finalfp, "%d- ", pat_index);		else {			char s[32];			int outindex;			sprintf(s, "%d- ", pat_index);			for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&					(s[outindex] != '\0'); outindex++) {				agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];			}			if (s[outindex] != '\0') {				OUTPUT_OVERFLOW;				return -1;			}			agrep_outpointer += outindex;		}		PRINTED = 1;	}	if (BYTECOUNT) {		if (agrep_finalfp != NULL)			fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);		else {			char s[32];			int outindex;			sprintf(s, "%d= ", CurrentByteOffset);			for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&					(s[outindex] != '\0'); outindex++) {				agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];			}			if (s[outindex] != '\0') {				OUTPUT_OVERFLOW;				return -1;			}			agrep_outpointer += outindex;		}		PRINTED = 1;	}	if (PRINTOFFSET) {		if (agrep_finalfp != NULL)			fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);		else {			char s[32];			int outindex;			sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);			for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&					 (s[outindex] != '\0'); outindex ++) {				agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];			}			if (s[outindex] != '\0') {				OUTPUT_OVERFLOW;				return -1;			}			agrep_outpointer += outindex;		}		PRINTED = 1;	}	return PRINTED;}intmonkey1( text, start, end  ) int start, end; register unsigned char *text;{	int PRINTED = 0;	int num=0;        unsigned char *oldtext;	int pat_index;	register uchar *textend;	unsigned char *textbegin;	unsigned char *curtextend;	unsigned char *curtextbegin;	register unsigned hash;	register uchar shift;	register int  m1, Long=LONG;	int MATCHED=0;	register uchar *qx;	register uchar *px;	register int p, p_end;	uchar *lastout;	/* int OUT=0; */	int hash2;	int j;	int DOWITHMASK;	DOWITHMASK = 0;	if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);	textbegin = text + start;	textend = text + end;	m1 = p_size-1;	lastout = text+start;	text = text + start + m1 -1 ;	/* -1 to allow match to the first \n in case the pattern has ^ in front of it *//*	if (WORDBOUND || WHOLELINE) text = text-1;	if (WHOLELINE) text = text-1;*/		/* to accomodate the extra 2 W_delim */	while (text <= textend) {		hash=tr1[*text];		hash=(hash<<Hbits)+(tr1[*(text-1)]);		if(Long) hash=(hash<<Hbits)+(tr1[*(text-2)]);		shift = SHIFT1[hash];#ifdef perf_check		cshift++;#endif		if(shift == 0) {			hash=hash&mask5;			hash2 = (tr[*(text-m1)]<<8) + tr[*(text-m1+1)];			p = HASH[hash];#ifdef perf_check			cshift0++;#endif			p_end = HASH[hash+1];#ifdef debug			printf("hash=%d, p=%d, p_end=%d\n", hash, p, p_end);#endif			while(p++ < p_end) {				if(hash2 != Hash2[p]) continue;#ifdef perf_check				chash++;#endif				if (((pat_index = pat_indices[p]) <= 0) || (pat_len[pat_index] <= 0)) continue;				px = PatPtr[p];				qx = text-m1;				while((*px!=0)&&(tr[*px] == tr[*qx])) {					px++;					qx++;				}				if (*px == 0) {					if(text > textend) return 0;					if (WORDBOUND) {						if (isalnum(*(unsigned char *)qx)) goto skip_output;						if (isalnum(*(unsigned char *)(text-m1-1))) goto skip_output;					}					if (!DOWITHMASK) {                                                /* Don't update CurrentByteOffset here: only before outputting properly */                                                if (!DELIMITER) {							curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));							if (*curtextbegin == '\n') curtextbegin ++;							curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;							if (*curtextend == '\n') curtextend ++;                                                }                                                else {                                                        curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);                                                        curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, D_pattern, D_length, OUTTAIL);                                                }						if (!OUTTAIL || INVERSE) textbegin = curtextend;						else if (DELIMITER) textbegin = curtextend - D_length;						else textbegin = curtextend - 1;					}					DOWITHMASK = 1;					if (pat_index <= anum_terminals) {						int	iii;						amatched_terminals[pat_index - 1] = 1;						for (iii=0; iii<anum_terminals; iii++)							if (aduplicates[pat_index - 1][iii])								amatched_terminals[iii] = 1;					}					if (AComplexBoolean) {						/* Can output only after all the matches in the current record have been identified: just like filter_output */						oldtext = text;						CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);						text = oldtext + pat_len[pat_index] - 1;						MATCHED = 0;						goto skip_output;					}					else if ((long)AParse & AND_EXP) {						for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;						if (j<anum_terminals) goto skip_output;					}					MATCHED=1;                                        oldtext = text; /* only for MULTI_OUTPUT */#undef	DO_OUTPUT#define DO_OUTPUT(change_text)\					num_of_matched++;\					if(FILENAMEONLY || SILENT)  return 0;\					if (!COUNT) {\num ++;\						if ((PRINTED = print_options(pat_index, text, curtextbegin, curtextend)) == -1) return -1;\						if(!INVERSE) {\							if (PRINTRECORD) {\							if (agrep_finalfp != NULL) {\								fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);\							}\							else {\								if (agrep_outpointer + curtextend - curtextbegin>= agrep_outlen) {\									OUTPUT_OVERFLOW;\									return -1;\								}\								else {\									memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend-curtextbegin);\									agrep_outpointer += curtextend - curtextbegin;\								}\							}\							}\							else if (PRINTED) {\								if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\								else agrep_outbuffer[agrep_outpointer ++] = '\n';\								PRINTED = 0;\							}\                                                        if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\								CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);\                                                                text = oldtext + pat_len[pat_index] - 1;\                                                                MATCHED = 0;\                                                        }\							else if (change_text) {\								CurrentByteOffset += textbegin - text;\								text = textbegin;\							}\						}\						else {	/* INVERSE */\							/* if(lastout < curtextbegin) OUT=1; */\							if (!SILENT) {\							if (agrep_finalfp != NULL)\								fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);\							else {\								if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {\									OUTPUT_OVERFLOW;\									return -1;\								}\								memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);\								agrep_outpointer += (curtextbegin-lastout);\							}\							}\							lastout=textbegin;\							if (change_text) {\								CurrentByteOffset += textbegin - text;\								text = textbegin;\							}\						}\					}\					else if (change_text) {	/* COUNT */\						CurrentByteOffset += textbegin - text;\						text = textbegin;\					}\					if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\					    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */\					DO_OUTPUT(1)				}			skip_output:                                if (MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;    /* else look for more possible matches since we never know how many will match */				if (DOWITHMASK && (text >= curtextend - 1)) {					DOWITHMASK = 0;					if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {						DO_OUTPUT(0)					}					if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);				}			}			/* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */			if (DOWITHMASK && (text >= curtextend - 1)) {				DOWITHMASK = 0;				if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {					DO_OUTPUT(0)				}				if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);			}			if(!MATCHED) shift = 1;	/* || MULTI_OUTPUT is implicit */			else {				MATCHED = 0;				shift = m1 - 1 > 0 ? m1 - 1 : 1;			}		}		/* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */		if (DOWITHMASK && (text >= curtextend - 1)) {			DOWITHMASK = 0;			if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {				DO_OUTPUT(0)			}			if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);		}		text += shift;		CurrentByteOffset += shift;	}	/* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */	if (DOWITHMASK && (text >= curtextend - 1)) {		DOWITHMASK = 0;		if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {			DO_OUTPUT(0)		}		if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);	}	if(INVERSE && !COUNT && (lastout <= textend)) {		if (!SILENT) {                if (agrep_finalfp != NULL) {                        while(lastout <= textend) fputc(*lastout++, agrep_finalfp);                }                else {                        if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {                                OUTPUT_OVERFLOW;                                return -1;                        }                        memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout+1);                        agrep_outpointer += (textend-lastout+1);                        lastout = textend;                }		}	}	return 0;}#if	DOTCOMPRESSEDinttc_monkey1( text, start, end  ) int start, end;register unsigned char *text;{	int PRINTED = 0;        unsigned char *oldtext;	int pat_index;	register uchar *textend;	unsigned char *textbegin;	unsigned char *curtextend;        unsigned char *curtextbegin;	register unsigned hash;	register uchar shift;	register int  m1, Long=LONG;	int MATCHED=0;	register uchar *qx;	register uchar *px;	register int p, p_end;	uchar *lastout;	/* int OUT=0; */	int hash2;	int j;	int DOWITHMASK;	struct timeval initt, finalt;	int newlen;	DOWITHMASK = 0;	if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);	textbegin = text + start;	textend = text + end;	m1 = tc_p_size-1;	lastout = text+start;	text = text + start + m1 -1;	/* -1 to allow match to the first \n in case the pattern has ^ in front of it */	/* WORDBOUND adjustment not required */	while (text <= textend) {		hash=tc_tr1[*text];		hash=(hash<<Hbits)+(tc_tr1[*(text-1)]);		if(Long) hash=(hash<<Hbits)+(tc_tr1[*(text-2)]);		shift = tc_SHIFT1[hash];#ifdef perf_check		cshift++;#endif		if(shift == 0) {			hash=hash&mask5;			hash2 = (tc_tr[*(text-m1)]<<8) + tc_tr[*(text-m1+1)];			p = tc_HASH[hash];#ifdef perf_check			cshift0++;#endif			p_end = tc_HASH[hash+1];#ifdef debug			printf("hash=%d, p=%d, p_end=%d\n", hash, p, p_end);#endif			while(p++ < p_end) {				if(hash2 != tc_Hash2[p]) continue;#ifdef perf_check				chash++;#endif				if (((pat_index = tc_pat_indices[p]) <= 0) || (tc_pat_len[pat_index] <= 0)) continue;				px = tc_PatPtr[p];				qx = text-m1; 				while((*px!=0)&&(tc_tr[*px] == tc_tr[*qx])) { 					px++; 					qx++; 				}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -