⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 agrep.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
	r2 = r3 = Init[0];	for(k=0; k<= D; k++) { 		A[k] = B[k] = Init[k]; 	}	if ( D == 0 )	{#if	AGREP_POINTER	    if (Text != -1)	    {#endif	/*AGREP_POINTER*/		alloc_buf(Text, &buffer, BlockSize+Maxline+1);		while ((num_read = fill_buf(Text, buffer + Maxline, BlockSize)) > 0)		{			i=Maxline; 			end = num_read + Maxline;#if 0			/* pab: Don't do this here; it's done in bitap.fill_buf,			 * where we can handle eof on a block boundary right */			if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end++] = '\n';#endif /* 0 */			if(FIRST_LOOP) {         /* if first time in the loop add a newline */				buffer[i-1] = '\n';  /* in front the  text.  */				i--;				CurrentByteOffset --;				FIRST_LOOP = 0;			}			/* RE1_PROCESS_WHEN_DZERO: the while-loop below */			while ( i < end )			{				c = buffer[i++];				CurrentByteOffset ++;				CMask = Mask[c];				if(c != Newline)				{					if(CMask != 0) {						r1 = Init1 & r3;						r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;					}					else  {						r2 = r3 & Init1;					}				}				else {					j++;					if (DELIMITER) CurrentByteOffset -= D_length;					else CurrentByteOffset -= 1;					r1 = Init1 & r3;            /* match against endofline */					r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;					if(TAIL) r2 = (Next[r2>>hh] | Next1[r2&LL]) | r2;                                        /* epsilon move */					if(( r2 & 1 ) ^ INVERSE) {						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {							num_of_matched++;							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%s", CurrentFileName);							else {								int outindex;								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(CurrentFileName[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];								}								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								agrep_outpointer += outindex;							}							if (PRINTFILETIME) {								char *s = aprint_file_time(CurrentFileTime);								if (agrep_finalfp != NULL)									fprintf(agrep_finalfp, "%s", s);								else {									int outindex;									for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&											(s[outindex] != '\0'); outindex++) {										agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];									}									if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {										OUTPUT_OVERFLOW;										free_buf(Text, buffer);										return -1;									}									agrep_outpointer += outindex;								}							}							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "\n");							else {								if (agrep_outpointer+1>=agrep_outlen) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								else agrep_outbuffer[agrep_outpointer++] = '\n';							}							free_buf(Text, buffer);							NEW_FILE = OFF;							return 0;						}						if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;}						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {							free_buf(Text, buffer);							return 0;	/* done */						}					}					r3 = Init0;					r2 = (Next[r3>>hh] | Next1[r3&LL]) & CMask | Init0;					/* match begin of line */					if (DELIMITER) CurrentByteOffset += 1*D_length;					else CurrentByteOffset += 1*1;				}				c = buffer[i++];				CurrentByteOffset ++;				CMask = Mask[c];				if(c != Newline)				{					if(CMask != 0) {						r1 = Init1 & r2;						r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;					}					else   r3 = r2 & Init1;				} /* if(NOT Newline) */				else {					j++;					if (DELIMITER) CurrentByteOffset -= D_length;					else CurrentByteOffset -= 1;					r1 = Init1 & r2;            /* match against endofline */					r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;					if(TAIL) r3 = ( Next[r3>>hh] | Next1[r3&LL] ) | r3;					/* epsilon move */					if(( r3 & 1 ) ^ INVERSE) {						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {							num_of_matched++;							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%s", CurrentFileName);							else {								int outindex;								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(CurrentFileName[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];								}								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								agrep_outpointer += outindex;							}							if (PRINTFILETIME) {								char *s = aprint_file_time(CurrentFileTime);								if (agrep_finalfp != NULL)									fprintf(agrep_finalfp, "%s", s);								else {									int outindex;									for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&											(s[outindex] != '\0'); outindex++) {										agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];									}									if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {										OUTPUT_OVERFLOW;										free_buf(Text, buffer);										return -1;									}									agrep_outpointer += outindex;								}							}							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "\n");							else {								if (agrep_outpointer+1>=agrep_outlen) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								else agrep_outbuffer[agrep_outpointer++] = '\n';							}							free_buf(Text, buffer);							NEW_FILE = OFF;							return 0;						}						if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;}						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {							free_buf(Text, buffer);							return 0;	/* done */						}					}					r2 = Init0;					r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | Init0;					/* match begin of line */					if (DELIMITER) CurrentByteOffset += 1*D_length;					else CurrentByteOffset += 1*1;				}			} /* while i < end ... */			strncpy(buffer, buffer+num_read, Maxline);		} /* end while fill_buf()... */		free_buf(Text, buffer);		return 0;#if	AGREP_POINTER	    }	    else {	/* within the memory buffer: assume it starts with a newline at position 0, the actual pattern follows that, and it ends with a '\n' */		num_read = agrep_inlen;		buffer = (CHAR *)agrep_inbuffer;		end = num_read;		/* buffer[end-1] = '\n';*/ /* at end of the text. */		/* buffer[0] = '\n';*/  /* in front of the  text. */		i = 0;			/* An exact copy of the above RE1_PROCESS_WHEN_DZERO: the while-loop below */			while ( i < end )			{				c = buffer[i++];				CurrentByteOffset ++;				CMask = Mask[c];				if(c != Newline)				{					if(CMask != 0) {						r1 = Init1 & r3;						r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;					}					else  {						r2 = r3 & Init1;					}				}				else {					j++;					if (DELIMITER) CurrentByteOffset -= D_length;					else CurrentByteOffset -= 1;					r1 = Init1 & r3;            /* match against endofline */					r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;					if(TAIL) r2 = (Next[r2>>hh] | Next1[r2&LL]) | r2;                                        /* epsilon move */					if(( r2 & 1 ) ^ INVERSE) {						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {							num_of_matched++;							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%s", CurrentFileName);							else {								int outindex;								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(CurrentFileName[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];								}								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								agrep_outpointer += outindex;							}							if (PRINTFILETIME) {								char *s = aprint_file_time(CurrentFileTime);								if (agrep_finalfp != NULL)									fprintf(agrep_finalfp, "%s", s);								else {									int outindex;									for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&											(s[outindex] != '\0'); outindex++) {										agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];									}									if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {										OUTPUT_OVERFLOW;										free_buf(Text, buffer);										return -1;									}									agrep_outpointer += outindex;								}							}							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "\n");							else {								if (agrep_outpointer+1>=agrep_outlen) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								else agrep_outbuffer[agrep_outpointer++] = '\n';							}							free_buf(Text, buffer);							NEW_FILE = OFF;							return 0;						}						if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;}						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {							free_buf(Text, buffer);							return 0;	/* done */						}					}					r3 = Init0;					r2 = (Next[r3>>hh] | Next1[r3&LL]) & CMask | Init0;					/* match begin of line */					if (DELIMITER) CurrentByteOffset += 1*D_length;					else CurrentByteOffset += 1*1;				}				c = buffer[i++];				CurrentByteOffset ++;				CMask = Mask[c];				if(c != Newline)				{					if(CMask != 0) {						r1 = Init1 & r2;						r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;					}					else   r3 = r2 & Init1;				} /* if(NOT Newline) */				else {					j++;					if (DELIMITER) CurrentByteOffset -= D_length;					else CurrentByteOffset -= 1;					r1 = Init1 & r2;            /* match against endofline */					r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;					if(TAIL) r3 = ( Next[r3>>hh] | Next1[r3&LL] ) | r3;					/* epsilon move */					if(( r3 & 1 ) ^ INVERSE) {						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {							num_of_matched++;							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%s", CurrentFileName);							else {								int outindex;								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(CurrentFileName[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];								}								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								agrep_outpointer += outindex;							}							if (PRINTFILETIME) {								char *s = aprint_file_time(CurrentFileTime);								if (agrep_finalfp != NULL)									fprintf(agrep_finalfp, "%s", s);								else {									int outindex;									for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&											(s[outindex] != '\0'); outindex++) {										agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];									}									if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {										OUTPUT_OVERFLOW;										free_buf(Text, buffer);										return -1;									}									agrep_outpointer += outindex;								}							}							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "\n");							else {								if (agrep_outpointer+1>=agrep_outlen) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								else agrep_outbuffer[agrep_outpointer++] = '\n';							}							free_buf(Text, buffer);							NEW_FILE = OFF;							return 0;						}						if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;}						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {							free_buf(Text, buffer);							return 0;	/* done */						}					}					r2 = Init0;					r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | Init0;					/* match begin of line */					if (DELIMITER) CurrentByteOffset += 1*D_length;					else CurrentByteOffset += 1*1;				}			} /* while i < end ... */		return 0;	    }#endif	/*AGREP_POINTER*/	} /*  end if (D == 0) */#if	AGREP_POINTER	if (Text != -1)	{#endif	/*AGREP_POINTER*/		while ((num_read = fill_buf(Text, buffer + Maxline, BlockSize)) > 0)		{			i=Maxline; 			end = Maxline + num_read;#if 0			/* pab: Don't do this here; it's done in bitap.fill_buf,			 * where we can handle eof on a block boundary right */			if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end++] = '\n';#endif /* 0 */			if(FIRST_TIME) {         /* if first time in the loop add a newline */				buffer[i-1] = '\n';  /* in front the  text.  */				i--;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -