⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 asearch1.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal.  All Rights Reserved. */#include "agrep.h"#include <errno.h>extern unsigned Init1, Init[], Mask[], endposition, D_endpos;extern unsigned NO_ERR_MASK;extern int TRUNCATE, DELIMITER, AND, I, S, DD, INVERSE, FILENAMEONLY, PRINTFILETIME ;extern char CurrentFileName[];extern long CurrentFileTime;extern int num_of_matched, prev_num_of_matched;extern int CurrentByteOffset;extern CHAR *agrep_inbuffer;extern int agrep_inlen;extern FILE *agrep_finalfp;extern CHAR *agrep_outbuffer;extern int agrep_outlen;extern int agrep_outpointer;extern int NEW_FILE, POST_FILTER;extern int LIMITOUTPUT, LIMITPERFILE;intasearch1(old_D_pat, Text, D)char old_D_pat[]; int Text; register unsigned D;{	register unsigned end, i, r1, r3, r4, r5, CMask, D_Mask, k, endpos; 	register unsigned r_NO_ERR;	unsigned A[MaxError*2+1], B[MaxError*2+1];	int D_length, ResidueSize, lasti, num_read,  FIRSTROUND=1, j=0;	CHAR *buffer;	/* CHAR *tempbuf = NULL;*/	/* used only when Text == -1 */	if(I == 0) Init1 = (unsigned)037777777777;	if(DD > D) DD = D+1;	if(I  > D) I  = D+1;	if(S  > D) S  = D+1;	D_length = strlen(old_D_pat);	r_NO_ERR = NO_ERR_MASK;	D_Mask = D_endpos;	for(i=1; i<D_length; i++) D_Mask = (D_Mask << 1) | D_Mask;	D_Mask = ~D_Mask;	endpos = D_endpos;	r3 = D+1; 	r4 = D*2;  /* to make sure in register */	for(k=0; k < D;   k++) A[k] = B[k] = 0;	for(k=D; k <= r4; k++) A[k] = B[k] = Init[0];#if	AGREP_POINTER	if (Text != -1) {#endif	/*AGREP_POINTER*/		lasti = Max_record;		alloc_buf(Text, &buffer, BlockSize+Max_record+1);		buffer[Max_record-1] = '\n';		while ((num_read = fill_buf(Text, buffer + Max_record, BlockSize)) > 0)		{			i=Max_record; 			end = Max_record + num_read;			if(FIRSTROUND) { 				i = Max_record -1 ;				if(DELIMITER) {					for(k=0; k<D_length; k++) {						if(old_D_pat[k] != buffer[Max_record+k]) break;					}					if(k>=D_length) j--;				}				FIRSTROUND = 0; 			}			if(num_read < BlockSize) {				strncpy(buffer+Max_record+num_read, old_D_pat, D_length);				end = end + D_length;				buffer[end] = '\0';			}			/* ASEARCH1_PROCESS: the while-loop below */			while (i < end)			{				CMask = Mask[buffer[i++]];				CurrentByteOffset ++;				r1 = Init1 & B[D];				A[D] = ((B[D] >> 1) & CMask )  | r1;				for(k = r3; k <= r4; k++)  /* r3 = D+1, r4 = 2*D */				{ 					r5 = B[k];					r1 = Init1 & r5;					A[k] = ((r5 >> 1) & CMask) | B[k-I] | (((A[k-DD] | B[k-S]) >>1) & r_NO_ERR) | r1 ; 				}				if(A[D] & endpos) {  					j++;					if (DELIMITER) CurrentByteOffset -= D_length;					else CurrentByteOffset -= 1;					if(((AND == 1) && ((A[D*2] & endposition) == endposition)) || ((AND == 0) && (A[D*2] & endposition)) ^ INVERSE )					{ 						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {							num_of_matched++;							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%s", CurrentFileName);							else {								int outindex;								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(CurrentFileName[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];								}								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								agrep_outpointer += outindex;							}							if (PRINTFILETIME) {								char *s = aprint_file_time(CurrentFileTime);								if (agrep_finalfp != NULL)									fprintf(agrep_finalfp, "%s", s);								else {									int outindex;									for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&											(s[outindex] != '\0'); outindex++) {										agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];									}									if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {										OUTPUT_OVERFLOW;										free_buf(Text, buffer);										return -1;									}									agrep_outpointer += outindex;								}							}							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "\n");							else {								if (agrep_outpointer+1>=agrep_outlen) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								else agrep_outbuffer[agrep_outpointer++] = '\n';							}							/*							if (Text == -1) {								memcpy(buffer+end-D_length, tempbuf, D_length+1);							}							*/							free_buf(Text, buffer);							NEW_FILE = OFF;							return 0;       						} 						if((Text != -1) && !(lasti >= Max_record + num_read - 1)) {							if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;}						}						else if ((Text == -1) && !(lasti >= num_read)) {							if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;}						}						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {							free_buf(Text, buffer);							return 0;	/* done */						}					}					lasti = i - D_length;					TRUNCATE = OFF;					for(k = D; k <= r4 ; k++) A[k] = B[k] = Init[0];					r1 = Init1 & B[D];					A[D] = (((B[D] >> 1) & CMask )  | r1) & D_Mask;					for(k = r3; k <= r4; k++)  /* r3 = D+1, r4 = 2*D */					{ 						r5 = B[k];						r1 = Init1 & r5;						A[k] = ((r5 >> 1) & CMask) | B[k-I] | (((A[k-DD] | B[k-S]) >>1) & r_NO_ERR) | r1 ; 					}					if (DELIMITER) CurrentByteOffset += 1*D_length;					else CurrentByteOffset += 1*1;				}  /* end if (A[D]&endpos) */				CMask = Mask[buffer[i++]];				CurrentByteOffset ++;				r1 = A[D] & Init1;				B[D] = ((A[D] >> 1) & CMask) | r1;				for(k = r3; k <= r4; k++)				{ 					r1 = A[k] & Init1;					B[k] = ((A[k] >> 1) & CMask) | A[k-I] | (((B[k-DD] | A[k-S]) >>1)&r_NO_ERR) | r1 ; 				}				if(B[D] & endpos)  {  					j++;					if (DELIMITER) CurrentByteOffset -= D_length;					else CurrentByteOffset -= 1;					if(((AND == 1) && ((B[r4] & endposition) == endposition)) || ((AND == 0) && (B[r4] & endposition)) ^ INVERSE )					{ 						if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) {							num_of_matched++;							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "%s", CurrentFileName);							else {								int outindex;								for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&										(CurrentFileName[outindex] != '\0'); outindex++) {									agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];								}								if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								agrep_outpointer += outindex;							}							if (PRINTFILETIME) {								char *s = aprint_file_time(CurrentFileTime);								if (agrep_finalfp != NULL)									fprintf(agrep_finalfp, "%s", s);								else {									int outindex;									for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&											(s[outindex] != '\0'); outindex++) {										agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];									}									if ((s[outindex] != '\0') || (outindex+agrep_outpointer>=agrep_outlen)) {										OUTPUT_OVERFLOW;										free_buf(Text, buffer);										return -1;									}									agrep_outpointer += outindex;								}							}							if (agrep_finalfp != NULL)								fprintf(agrep_finalfp, "\n");							else {								if (agrep_outpointer+1>=agrep_outlen) {									OUTPUT_OVERFLOW;									free_buf(Text, buffer);									return -1;								}								else agrep_outbuffer[agrep_outpointer++] = '\n';							}							/*							if (Text == -1) {								memcpy(buffer+end-D_length, tempbuf, D_length+1);							}							*/							free_buf(Text, buffer);							NEW_FILE = OFF;							return 0;						}						if((Text != -1) && !(lasti >= Max_record + num_read - 1)) {							if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;}						}						else if ((Text == -1) && !(lasti >= num_read)) {							if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;}						}						if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||						    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {							free_buf(Text, buffer);							return 0;	/* done */						}					} 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -