📄 sgrep.c

📁 Mehldau和Myer的Agrep3版本
💻 C
📖 第 1 页 / 共 5 页
字号:
					}
				}
				else {
					if(m >= LONG_APPX) {
						if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) {
							free_buf(fd, text);
							return -1;
						}
					}
					else {
						if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) {
							free_buf(fd, text);
							return -1;
						}
					}
				}
			}
			if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
				if (agrep_finalfp != NULL)
					fprintf(agrep_finalfp, "%s\n", CurrentFileName);
				else {
					int outindex;
					for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
							(CurrentFileName[outindex] != '\0'); outindex++) {
						agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
					}
					if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
						OUTPUT_OVERFLOW;
						free_buf(fd, text);
						return -1;
					}
					else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
					agrep_outpointer += outindex;
				}
				free_buf(fd, text);
				NEW_FILE = OFF;
				return 0; 
			}

			CurrentByteOffset = oldCurrentByteOffset + end - start + 1;	/* for a new iteration: avoid complicated calculations below */
			start = offset - residue ;
			if(start < MAXLINE) {
				start = MAXLINE; 
			}
			strncpy(text+start, text+end, residue);
			start++;
			if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
			    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) {
				free_buf(fd, text);
				return 0;	/* done */
			}
		} /* end of while(num_read = ...) */
                if (!DELIMITER) {
                        text[start-1] = '\n';
                        text[start+residue] = '\n';
                }
                else {
                        if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length);
                        memcpy(text+start+residue, D_pattern, D_length);
                }
		end = start + residue - 2;
                if(residue > 1) {
			/* SGREP_PROCESS */
			/* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */
			if(D==0)  {
				if(m > LONG_EXAC) {
					if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) {
						free_buf(fd, text);
						return -1;
					}
				}
				else {
					if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) {
						free_buf(fd, text);
						return -1;
					}
				}
			}
			else {
				if(DNA) {
					if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) {
						free_buf(fd, text);
						return -1;
					}
				}
				else {
					if(m >= LONG_APPX) {
						if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) {
							free_buf(fd, text);
							return -1;
						}
					}
					else {
						if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) {
							free_buf(fd, text);
							return -1;
						}
					}
				}
			}
			if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {
				if (agrep_finalfp != NULL)
					fprintf(agrep_finalfp, "%s\n", CurrentFileName);
				else {
					int outindex;
					for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
							(CurrentFileName[outindex] != '\0'); outindex++) {
						agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
					}
					if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
						OUTPUT_OVERFLOW;
						free_buf(fd, text);
						return -1;
					}
					else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
					agrep_outpointer += outindex;
				}
				free_buf(fd, text);
				NEW_FILE = OFF;
				return 0; 
			}
                }
		free_buf(fd, text);
		return 0;
#if	AGREP_POINTER
	}
	else {	/* as if only one iteration of the while-loop and offset = 0 */
		tempbuf = (CHARTYPE*)malloc(m);
		text = (CHARTYPE *)agrep_inbuffer;
		num_read = agrep_inlen;
		start = 0;
		buf_end = end = num_read - 1;
#if	0
		if (WHOLELINE) {
			start --;
			CurrentByteOffset --;
		}
#endif
		if ((TCOMPRESSED == ON) && tuncompressible(text+1, num_read)) {
			EASYSEARCH = text[offset+SIGNATURE_LEN-1];
			start += SIGNATURE_LEN;
			CurrentByteOffset += SIGNATURE_LEN;
			if (!EASYSEARCH) {
				fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName);
			}
#if	MEASURE_TIMES
			gettimeofday(&initt, NULL);
#endif	/*MEASURE_TIMES*/
			if (samepattern || ((newm = quick_tcompress(FREQ_FILE, HASH_FILE, pat, m, newpat, MAXLINE-8, EASYSEARCH)) > 0)) {
				oldm = m;
				oldpat = pat;
				m = newm;
				pat = newpat;
			}
#if	MEASURE_TIMES
			gettimeofday(&finalt, NULL);
			INFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif	/*MEASURE_TIMES*/
		}
		else TCOMPRESSED = OFF;

		PROCESS_PATTERN	/* must be after we know whether it is compressed or not */

		/* Emergency Stop: put one copy of pattern to the end of the buffer
		   to make sure that the skip loop in bm()
		   won't go out of bound in later iterations */
		   
		memcpy(tempbuf, text+end+1, m);	/* save portion being overwritten */
		for(i=1; i<=m; i++) text[end+i] = pat[m-1];

                        if (!DELIMITER)
                                while(text[end]  != '\n' && end > 1) end--;
                        else {
                                unsigned char *newbuf = text + end + 1;
                                newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL);        /* see agrep.c/'d' */
				if (newbuf < text+offset+D_length) newbuf = text + end + 1;
                                end = newbuf - text - 1;
                        }
                        /* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end ] */

			/* An exact copy of the above SGREP_PROCESS */
			/* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */
			if(D==0)  {
				if(m > LONG_EXAC) {
					if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) {
						free_buf(fd, text);
						memcpy(text+end+1, tempbuf, m); /* restore */
						free(tempbuf);
						return -1;
					}
				}
				else {
					if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) {
						free_buf(fd, text);
						memcpy(text+end+1, tempbuf, m); /* restore */
						free(tempbuf);
						return -1;
					}
				}
			}
			else {
				if(DNA) {
					if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) {
						free_buf(fd, text);
						memcpy(text+end+1, tempbuf, m); /* restore */
						free(tempbuf);
						return -1;
					}
				}
				else {
					if(m >= LONG_APPX) {
						if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) {
							free_buf(fd, text);
							memcpy(text+end+1, tempbuf, m); /* restore */
							free(tempbuf);
							return -1;
						}
					}
					else {
						if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) {
							free_buf(fd, text);
							memcpy(text+end+1, tempbuf, m); /* restore */
							free(tempbuf);
							return -1;
						}
					}
				}
			}
			if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) {	/* externally set */
				if (agrep_finalfp != NULL)
					fprintf(agrep_finalfp, "%s\n", CurrentFileName);
				else {
					int outindex;
					for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
							(CurrentFileName[outindex] != '\0'); outindex++) {
						agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
					}
					if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+1>=agrep_outlen)) {
						OUTPUT_OVERFLOW;
						free_buf(fd, text);
						memcpy(text+end+1, tempbuf, m); /* restore */
						free(tempbuf);
						return -1;
					}
					else agrep_outbuffer[agrep_outpointer+outindex++] = '\n';
					agrep_outpointer += outindex;
				}
				free_buf(fd, text);
				NEW_FILE = OFF;
			}

		memcpy(text+end+1, tempbuf, m); /* restore */
		free(tempbuf);
		return 0;
	}
#endif	/*AGREP_POINTER*/
} /* end sgrep */

/* SUN:

   BOYER-MOORE.
   
   Our implementation of bm assumes
   that the content of text[n]...text[n+m-1] is pat[m-1] (emergency stop)
   such that the skip loop is guaranteed to terminated.
      
*/

int bm(pat, m, text, textend, oldpat, oldm)

CHARTYPE *text, *textend, *pat, *oldpat;
int m, oldm;
{
	int PRINTED = 0;
	register int shift;
	register int  m1, j, d1; 
	CHARTYPE *textbegin = text;
	int newlen;
	CHARTYPE *textstart;
	CHARTYPE *curtextbegin;
	CHARTYPE *curtextend;
#if	MEASURE_TIMES
	struct timeval initt, finalt;
#endif
	CHARTYPE *lastout = text;

	d1 = shift_1;	/* at least 1 */
	m1 = m - 1;
	shift = 0;	/* to start with the skip loop: assume,
			   the first character is a match. */

#ifdef DEBUG2
	printf("***BM-START*** textend=%d=%c=\n",textend,*textend);
#endif

	/* The original loop was: while (text <= textend) [TG] 04.10.96 */

	while (text < textend) {
	
		textstart = text;
		
#ifdef DEBUG2
		printf("shift=%d text=%d=%c\n",shift,text,*text);
#endif	
		/* the skip-loop: skip until a match is found (shift=0) */
		while(shift) {
			shift = SHIFT[*(text += shift)];
#ifdef DEBUG2
			printf("shift=%d text=%d=%c\n",shift,text,*text);
#endif
		}
		
		CurrentByteOffset += text - textstart;

		j = 0;
		while(TR[pat[m1 - j]] == TR[*(text - j)]) {
			if(++j == m)  break;       /* if statement can be saved,
						      but for safety ... */
		}
		
		if (j == m ) {
		
			if(text > textend) return 0;
			
			if(WORDBOUND) {
				if(isalnum(*(text+1))) { shift=1 /*TG*/ ; goto CONT; }	/* as if there was no match */
				if(isalnum(*(text-m))) { shift=1 /*TG*/ ; goto CONT; }	/* as if there was no match */
				
				/* changed by Udi 11/7/94 to avoid having to set TR[] to W_delim */
			}

			if (TCOMPRESSED == ON) {
				/* Don't update CurrentByteOffset here: only before outputting properly */
				if (!DELIMITER) {
					curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
					if (*curtextbegin == '\n') curtextbegin ++;
					curtextend = text+1; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
					if (*curtextend == '\n') curtextend ++;
					
/*** [TG] *THE CODE BELOW MUST BE COPIED HERE ***/

				}
				else {
					curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
					curtextend = forward_delimiter(text+1, textend, tc_D_pattern, tc_D_length, OUTTAIL);
				}
			}
			else {
				/* Don't update CurrentByteOffset here: only before outputting properly */
				if (!DELIMITER) {
					curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
					if (*curtextbegin == '\n') curtextbegin ++;
					
					curtextend = text+1;
					while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
					if (*curtextend == '\n') curtextend ++;
					
					/* adjust for files without CR as last character;
					   this part is only needed for hits at the very last line. [TG] */

					if (curtextend >= textend) {
						curtextend=textend+1;
						if (*(curtextend-1) != '\n') *curtextend++='\n';
					}
					
				}
				else {
					curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
					curtextend = forward_delimiter(text+1, textend, D_pattern, D_length, OUTTAIL);
				}
			}

			if (TCOMPRESSED == ON) {
#if     MEASURE_TIMES
                                gettimeofday(&initt, NULL);
#endif  /*MEASURE_TIMES*/
				if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text - curtextbegin + m, EASYSEARCH)) {
					shift = 1;	/* TG */
					goto CONT;	/* as if there was no match */
				}
#if     MEASURE_TIMES
                                gettimeofday(&finalt, NULL);
                                FILTERALGO_ms +=  (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif  /*MEASURE_TIMES*/
			}

			textbegin = curtextend; /* (curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
			num_of_matched++;
			if(FILENAMEONLY) return 0;
			if(!COUNT) {
				if (!INVERSE) {
					if(FNAME && (NEW_FILE || !POST_FILTER)) {
						char	nextchar = (POST_FILTER == ON)?'\n':' ';
						char	*prevstring = (POST_FILTER == ON)?"\n":"";
						if (agrep_finalfp != NULL)
							fprintf(agrep_finalfp, "%s%s:%c", prevstring, CurrentFileName, nextchar);
						else {
							int outindex;
							if (prevstring[0] != '\0') {
								if(agrep_outpointer + 1 >= agrep_outlen) {
									OUTPUT_OVERFLOW;
									return -1;
								}
								else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
							}
							for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
									(CurrentFileName[outindex] != '\0'); outindex++) {
								agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
							}
							if ((CurrentFileName[outindex] != '\0') || (outindex+agrep_outpointer+2>=agrep_outlen)) {
								OUTPUT_OVERFLOW;
								return -1;
							}
							else {
								agrep_outbuffer[agrep_outpointer+outindex++] = ':';
								agrep_outbuffer[agrep_outpointer+outindex++] = nextchar;
							}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -