⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 newmgrep.c

📁 Mehldau和Myer的Agrep3版本
💻 C
📖 第 1 页 / 共 5 页
字号:
 * 4. Process other variables/functions (pat_spool, tr, tr1, pat_len, accumulate, SHIFT1, f_prep, f_prep1, pat_indices) appropriately.
 */
int
tc_prepf(buf, length)
unsigned char *buf;
int	length;
{
	int i, p=1;
	uchar *pat_ptr;
	unsigned Mask = 31;
	int tc_length;
	unsigned char tc_buf[MAXPAT * 2];	/* maximum length of the compressed pattern */
	static struct timeval initt, finalt;

	if (length*2 > MAXPATFILE + 2*max_num) {
		fprintf(stderr, "%s: pattern buffer too large (> %d B)\n", Progname, (MAXPATFILE+2*max_num)/2);
		return -1;
	}
	if (tc_pat_spool != NULL) free(tc_pat_spool);
	pat_ptr = tc_pat_spool = (unsigned char *)malloc(length*2 + MAXPAT);

#if	MEASURE_TIMES
	gettimeofday(&initt, NULL);
#endif	/*MEASURE_TIMES*/

	i=0; p=1;
	while(i < length) {
		tc_patt[p] = pat_ptr;
		while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
		*pat_ptr++ = 0;
		if ((tc_length = quick_tcompress(FREQ_FILE, HASH_FILE, tc_patt[p], strlen(tc_patt[p]), tc_buf, MAXPAT * 2 - 8, TC_EASYSEARCH)) > 0) {
			memcpy(tc_patt[p], tc_buf, tc_length);
			tc_patt[p][tc_length] = '\0';
			pat_ptr = tc_patt[p] + tc_length + 1;	/* character after '\0' */
		}
		p++;  
	}

	for(i=1; i<20; i++) *pat_ptr = i;  /* boundary safety zone */

	/* Ignore all other options: it is automatically W_DELIM */
	for(i=0; i< MAXSYM; i++) tc_tr[i] = i;
	for(i=0; i< MAXSYM; i++) tc_tr1[i] = tc_tr[i]&Mask;
	tc_num_pat =  p-1;
	tc_p_size  =  MAXPAT;
	for(i=1; i<=num_pat; i++) {
		p = strlen(tc_patt[i]);
		tc_pat_len[i] = p;
#ifdef	debug
		printf("prepf(): tc_patt[%d]=%s, tc_pat_len[%d]=%d\n", i, tc_patt[i], i, tc_pat_len[i]);
#endif
		if(p!=0 && p < tc_p_size) tc_p_size = p;	/* MIN */
	}
	if(tc_p_size == 0) {	/* cannot happen NOW */
		fprintf(stderr, "%s: the pattern file is empty\n", Progname);
                if (!EXITONERROR) {
                        errno = AGREP_ERROR;
                        return -1;
                }
                else exit(2);
	}
	if(length > 400 && tc_p_size > 2) tc_LONG = 1;
	if(tc_p_size == 1) tc_SHORT = 1;
	for(i=0; i<MAXMEMBER1; i++) tc_SHIFT1[i] = tc_p_size - 1 - LONG;
	for(i=0; i<MAXHASH; i++) {
		tc_HASH[i] = 0;
	}
	for(i=1; i<=tc_num_pat; i++) tc_f_prep(i, tc_patt[i]);
	tc_accumulate();
	memset(tc_pat_indices, '\0', sizeof(int) * (tc_num_pat + 1));
	for(i=1; i<=tc_num_pat; i++) tc_f_prep1(i, tc_patt[i]);

#if	MEASURE_TIMES
	gettimeofday(&finalt, NULL);
	INFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif	/*MEASURE_TIMES*/
        return 0;
}
#endif	/*DOTCOMPRESSED*/

int
mgrep(fd)
int fd;
{ 
	register char r_newline = '\n';
	unsigned char *text;
	register int buf_end, num_read, start, end, residue = 0;
	int	oldCurrentByteOffset;
	int	first_time = 1;

#if     AGREP_POINTER
        if (fd != -1) {
#endif  /*AGREP_POINTER*/
                alloc_buf(fd, &text, 2*BLOCKSIZE+MAXLINE);
		text[MAXLINE-1] = '\n';  /* initial case */
		start = MAXLINE;

		while( (num_read = fill_buf(fd, text+MAXLINE, 2*BLOCKSIZE)) > 0) 
		{
			buf_end = end = MAXLINE + num_read -1 ;
			oldCurrentByteOffset = CurrentByteOffset;

			if (first_time) {
				if ((TCOMPRESSED == ON) && tuncompressible(text+MAXLINE, num_read)) {
					EASYSEARCH = text[MAXLINE+SIGNATURE_LEN-1];
					start += SIGNATURE_LEN;
					CurrentByteOffset += SIGNATURE_LEN;
					if (!EASYSEARCH) {
						fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
					}
				}
				else TCOMPRESSED = OFF;
				first_time = 0;
			}

			if (!DELIMITER) {
				while(text[end]  != r_newline && end > MAXLINE) end--;
				text[start-1] = r_newline;
			}
			else {
				unsigned char *newbuf = text + end + 1;
				newbuf = backward_delimiter(newbuf, text+MAXLINE, D_pattern, D_length, OUTTAIL);	/* see agrep.c/'d' */
				if (newbuf < text+MAXLINE+D_length) newbuf = text + end + 1;
				end = newbuf - text - 1;

/* TG 22.10.97 Check bounds before memcpy-ing */
/* printf("text %x start %i D_length %i D_pattern %i residue %i\n",text,start,D_length,D_pattern,residue); */

			if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);
			memcpy(text+start+residue, D_pattern, D_length);

/* original code was:	memcpy(text+start-D_length, D_pattern, D_length);	*/

			}
			residue = buf_end - end  + 1 ;
			if(INVERSE && COUNT) countline(text+MAXLINE, num_read);

			/* MGREP_PROCESS */
			if (TCOMPRESSED) {	/* separate functions since separate globals => too many if-statements within a single function makes it slow */
#if	DOTCOMPRESSED
				if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}
				else      { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
#endif	/*DOTCOMPRESSED*/
			}
			else {
				if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}
				else      { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
			}
                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
                                if (agrep_finalfp != NULL)
                                        fprintf(agrep_finalfp, "%s\n", CurrentFileName);
                                else {
                                        int outindex;
                                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
                                                        (CurrentFileName[outindex] != '\0'); outindex++) {
                                                agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
                                        }
                                        if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
                                                OUTPUT_OVERFLOW;
                                                free_buf(fd, text);
                                                return -1;
                                        }
                                        else {
                                                agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
                                        }
                                        agrep_outpointer += outindex;
                                }
                                free_buf(fd, text);
                                NEW_FILE = OFF;
                                return 0;
                        }

			CurrentByteOffset = oldCurrentByteOffset + end - start + 1;
			start = MAXLINE - residue;
			if(start < 0) {
				start = 1; 
			}
			strncpy(text+start, text+end, residue);

			if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
			    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
				free_buf(fd, text);
				return 0;	/* done */
			}
		} /* end of while(num_read = ... */
		if (!DELIMITER) {
			text[start-1] = '\n';
			text[start+residue] = '\n';
		}
		else {
			if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);
			memcpy(text+start+residue, D_pattern, D_length);
		}
		end = start + residue;
		if(residue > 1) {
			if (TCOMPRESSED) {
#if	DOTCOMPRESSED
				if(tc_SHORT) tc_m_short(text, start, end);
				else      tc_monkey1(text, start, end);
#endif	/*DOTCOMPRESSED*/
			}
			else {
				if(SHORT) m_short(text, start, end);
				else      monkey1(text, start, end);
			}
                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
                                if (agrep_finalfp != NULL)
                                        fprintf(agrep_finalfp, "%s\n", CurrentFileName);
                                else {
                                        int outindex;
                                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
                                                        (CurrentFileName[outindex] != '\0'); outindex++) {
                                                agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
                                        }
                                        if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
                                                OUTPUT_OVERFLOW;
                                                free_buf(fd, text);
                                                return -1;
                                        }
                                        else {
                                                agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
                                        }
                                        agrep_outpointer += outindex;
                                }
                                free_buf(fd, text);
                                NEW_FILE = OFF;
                                return 0;
                        }
		}
		free_buf(fd, text);
		return (0);
#if	AGREP_POINTER
	}
	else {
                text = (unsigned char *)agrep_inbuffer;
                num_read = agrep_inlen;
                start = 0;
                buf_end = end = num_read - 1;

			oldCurrentByteOffset = CurrentByteOffset;

			if (first_time) {
				if ((TCOMPRESSED == ON) && tuncompressible(text+MAXLINE, num_read)) {
					EASYSEARCH = text[MAXLINE+SIGNATURE_LEN-1];
					start += SIGNATURE_LEN;
					CurrentByteOffset += SIGNATURE_LEN;
					if (!EASYSEARCH) {
						fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
					}
				}
				else TCOMPRESSED = OFF;
				first_time = 0;
			}

			if (!DELIMITER)
				while(text[end]  != r_newline && end > 1) end--;
			else {
                                unsigned char *newbuf = text + end + 1;
                                newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL);        /* see agrep.c/'d' */
				if (newbuf < text+D_length) newbuf = text + end + 1;
                                end = newbuf - text - 1;
			}
			/* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end] */

			if (INVERSE && COUNT) countline(text, num_read);

                        /* An exact copy of the above MGREP_PROCESS */
			if (TCOMPRESSED) {	/* separate functions since separate globals => too many if-statements within a single function makes it slow */
#if	DOTCOMPRESSED
				if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}}
				else      { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
#endif	/*DOTCOMPRESSED*/
			}
			else {
				if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}}
				else      { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}}
			}
                        if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
                                if (agrep_finalfp != NULL)
                                        fprintf(agrep_finalfp, "%s\n", CurrentFileName);
                                else {
                                        int outindex;
                                        for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
                                                        (CurrentFileName[outindex] != '\0'); outindex++) {
                                                agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
                                        }
                                        if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
                                                OUTPUT_OVERFLOW;
                                                free_buf(fd, text);
                                                return -1;
                                        }
                                        else {
                                                agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
                                        }
                                        agrep_outpointer += outindex;
                                }
                                free_buf(fd, text);
                                NEW_FILE = OFF;
                                return 0;
                        }

                return 0;
	}
#endif	/*AGREP_POINTER*/
#ifdef perf_check
	fprintf(stderr,"Shifted %d times; shift=0 %d times; hash was = %d times\n",cshift, cshift0, chash);
	return 0;
#endif
} /* end mgrep */

static void
countline(text, len)
unsigned char *text; int len;
{
int i;
	for (i=0; i<len; i++) if(text[i] == '\n') total_line++;
}

/* Stuff that always needs to be printed whenever there is a match in all functions in this file */
int
print_options(pat_index, text, curtextbegin, curtextend)
	int	pat_index;
	unsigned char	*text, *curtextbegin, *curtextend;
{
	int	PRINTED = 0;
	if(FNAME && (NEW_FILE || !POST_FILTER)) {
		char    nextchar = (POST_FILTER == ON)?'\n':' ';
		char    *prevstring = (POST_FILTER == ON)?"\n":"";
		if (agrep_finalfp != NULL)
			fprintf(agrep_finalfp, "%s%s:%c", prevstring, CurrentFileName, nextchar);
		else {
			int outindex;
			if (prevstring[0] != '\0') {
				if(agrep_outpointer + 1 >= agrep_outlen) {
					OUTPUT_OVERFLOW;
					return -1;
				}
				else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
			}
			for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
					(CurrentFileName[outindex] != '\0'); outindex++) {
				agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
			}
			if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+2>=agrep_outlen)) {
				OUTPUT_OVERFLOW;
				return -1;
			}
			else {
				agrep_outbuffer[agrep_outpointer+outindex++] = ':';
				agrep_outbuffer[agrep_outpointer+outindex++] = nextchar;
			}
			agrep_outpointer += outindex;
		}
		NEW_FILE = OFF;
		PRINTED = 1;
	}

	if (PRINTPATTERN) {
		if (agrep_finalfp != NULL)
			fprintf(agrep_finalfp, "%d- ", pat_index);
		else {
			char s[32];
			int outindex;
			sprintf(s, "%d- ", pat_index);
			for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
					(s[outindex] != '\0'); outindex++) {
				agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
			}
			if (s[outindex] != '\0') {
				OUTPUT_OVERFLOW;
				return -1;
			}
			agrep_outpointer += outindex;
		}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -