⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgrep.c

📁 Mehldau和Myer的Agrep3版本
💻 C
📖 第 1 页 / 共 5 页
字号:
					OUTPUT_OVERFLOW;
					return -1;
				}
				memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
				agrep_outpointer += (textend - lastout + 1);
			}
		} /* TCOMPRESSED */
	}

	return 0;
}

/* Don't update CurrentByteOffset here: done by caller */
int
s_output(text, i, textbegin, textend, lastout, pat, m, oldpat, oldm) 
int *i;	/* in, out */
int m, oldm; 
CHARTYPE *text, *textbegin, *textend, *pat, *oldpat;
CHARTYPE **lastout;	/* in, out */
{
	int PRINTED = 0;
	int newlen; 
	int oldi;
	CHARTYPE *curtextbegin;
	CHARTYPE *curtextend;
#if	MEASURE_TIMES
	struct timeval initt, finalt;
#endif

	if(SILENT) return 0;
	if (TCOMPRESSED == ON) {
		if (!DELIMITER) {
			curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
			if (*curtextbegin == '\n') curtextbegin ++;
			curtextend = text + *i /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
			if (*curtextend == '\n') curtextend ++;
		}
		else {
			curtextbegin = backward_delimiter(text + *i, text, tc_D_pattern, tc_D_length, OUTTAIL);
			curtextend = forward_delimiter(text + *i /* + 1 agrep() has i++ */, textend, tc_D_pattern, tc_D_length, OUTTAIL);
		}
	}
	else {
		if (!DELIMITER) {
			curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
			if (*curtextbegin == '\n') curtextbegin ++;
			curtextend = text + *i /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
			if (*curtextend == '\n') curtextend ++;
		}
		else {
			curtextbegin = backward_delimiter(text + *i, text, D_pattern, D_length, OUTTAIL);
			curtextend = forward_delimiter(text + *i /* + 1 agrep() has i++ */, textend, D_pattern, D_length, OUTTAIL);
		}
	}

	if (TCOMPRESSED == ON) {
#if     MEASURE_TIMES
		gettimeofday(&initt, NULL);
#endif  /*MEASURE_TIMES*/
		if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text  + *i - curtextbegin + m, EASYSEARCH)) {
			num_of_matched --;
			return 0;
		}
#if     MEASURE_TIMES
		gettimeofday(&finalt, NULL);
		FILTERALGO_ms +=  (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif  /*MEASURE_TIMES*/
	}

	textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
	oldi = *i;
	*i += textbegin - (text + *i);
	if(COUNT) return 0;


	if (INVERSE) {
		if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
			if (agrep_finalfp != NULL)
				newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_finalfp, -1, EASYSEARCH);
			else {
				if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
					if (newlen + agrep_outpointer >= agrep_outlen) {
						OUTPUT_OVERFLOW;
						return -1;
					}
					agrep_outpointer += newlen;
				}
			}
			*lastout=textbegin;
			CurrentByteOffset += textbegin - text;
			text = textbegin;
		}
		else { /* NOT TCOMPRESSED */
			if (agrep_finalfp != NULL)
				fwrite(*lastout, 1, curtextbegin-*lastout, agrep_finalfp);
			else {
				if (curtextbegin - *lastout + agrep_outpointer >= agrep_outlen) {
					OUTPUT_OVERFLOW;
					return -1;
				}
				memcpy(agrep_outbuffer+agrep_outpointer, *lastout, curtextbegin-*lastout);
				agrep_outpointer += (curtextbegin - *lastout);
			}
			*lastout=textbegin;
			CurrentByteOffset += textbegin - text;
			text = textbegin;
		} /* TCOMPRESSED */
		return 0;
	}

	if(FNAME && (NEW_FILE || !POST_FILTER)) {
		char	nextchar = (POST_FILTER == ON)?'\n':' ';
		char	*prevstring = (POST_FILTER == ON)?"\n":"";
		if (agrep_finalfp != NULL)
			fprintf(agrep_finalfp, "%s%s:%c", prevstring, CurrentFileName, nextchar);
		else {
			int outindex;
			if (prevstring[0] != '\0') {
				if(agrep_outpointer + 1 >= agrep_outlen) {
					OUTPUT_OVERFLOW;
					return -1;
				}
				else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
			}
			for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
					(CurrentFileName[outindex] != '\0'); outindex++) {
				agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
			}
			if ((CurrentFileName[outindex] != '\0') || (outindex + agrep_outpointer + 2 >= agrep_outlen)) {
				OUTPUT_OVERFLOW;
				return -1;
			}
			agrep_outbuffer[agrep_outpointer + outindex++] = ':';
			agrep_outbuffer[agrep_outpointer + outindex++] = nextchar;
			agrep_outpointer += outindex;
		}
		NEW_FILE = OFF;
		PRINTED = 1;
	}

	if(BYTECOUNT) {
		if (agrep_finalfp != NULL)
			fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
		else {
			char s[32];
			int  outindex;
			sprintf(s, "%d= ", CurrentByteOffset);
			for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
					(s[outindex] != '\0'); outindex++) {
				agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
			}
			if (s[outindex] != '\0') {
				OUTPUT_OVERFLOW;
				return -1;
			}
			agrep_outpointer += outindex;
		}
		PRINTED = 1;
	}

	if (PRINTOFFSET) {
		if (agrep_finalfp != NULL)
			fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text + oldi-curtextbegin), curtextend-curtextbegin);
		else {
			char s[32];
			int outindex;
			sprintf(s, "@%d{%d} ", CurrentByteOffset - (text + oldi-curtextbegin), curtextend-curtextbegin);
			for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
					 (s[outindex] != '\0'); outindex ++) {
				agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
			}
			if (s[outindex] != '\0') {
				OUTPUT_OVERFLOW;
				return -1;
			}
			agrep_outpointer += outindex;
		}
		PRINTED = 1;
	}
	if (PRINTRECORD) {

	if (TCOMPRESSED == ON) {
#if	MEASURE_TIMES
		gettimeofday(&initt, NULL);
#endif	/*MEASURE_TIMES*/
		if (agrep_finalfp != NULL) {
			newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
		}
		else {
			if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
				if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
					OUTPUT_OVERFLOW;
					return -1;
				}
				agrep_outpointer += newlen;
			}
		}
#if	MEASURE_TIMES
		gettimeofday(&finalt, NULL);
		OUTFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif	/*MEASURE_TIMES*/
	}
	else {
		if (agrep_finalfp != NULL) {
			fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
		}
		else {
			if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
				OUTPUT_OVERFLOW;
				return -1;
			}
			memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend - curtextbegin);
			agrep_outpointer += curtextend - curtextbegin;
		}
	}
	}
	else if (PRINTED) {
		if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
		else agrep_outbuffer[agrep_outpointer ++] = '\n';
		PRINTED = 0;
	}
	return 0;
}

static void
prep_bm(Pattern, m)      
unsigned char *Pattern;
register m;
{
	int i;
	unsigned hash;
	unsigned char lastc;
	for (i = 0; i < MAXSYM; i++) SHIFT[i] = m;
	for (i = m-1; i>=0; i--) {
		hash = TR[Pattern[i]];
		if((int)(SHIFT[hash]) >= (int)(m - 1)) SHIFT[hash] = m-1-i;
	}
	shift_1 = m-1;

	/* shift_1 records the previous occurrence of the last character of
	the pattern. When we match this last character but do not have a match,
	we can shift until we reach the next occurrence from the right. */

	lastc = TR[Pattern[m-1]];

	for (i= m-2; i>=0; i--) {
	
		if(TR[Pattern[i]] == lastc )
		{ 
			shift_1 = m-1 - i;  
			i = -1; 
		}
	}
	
	if(shift_1 == 0) shift_1 = 1; /* can never happen - Udi 11/7/94 */

	/* if(NOUPPER) */ for(i=0; i<MAXSYM; i++) {

#if ((defined(__EMX__) || defined(_WIN32)) && defined(ISO_CHAR_SET))
		SHIFT[i] = SHIFT[LUT[i]];
#else
		if (isupper(i)) SHIFT[i] = SHIFT[tolower(i)];
#endif
	}
	
#ifdef DEBUG
	for(i=0; i<MAXSYM; i++) {
		printf("%c:%d ", i, SHIFT[i]); 
		if ((i % 8) == 0) printf("\n");
	}
	printf("\n");
#endif
}

/* monkey uses two characters for delta_1 shifting */

CHARTYPE SHIFT_2[MAX_SHIFT_2];

int
monkey( pat, m, text, textend  ) 
register int m  ; 
register CHARTYPE *text, *textend, *pat;
{
	int PRINTED = 0;
	register unsigned hash;
	register CHARTYPE shift;
	register int  m1, j; 
	CHARTYPE *textbegin = text;
	CHARTYPE *textstart;
	int newlen;
	CHARTYPE *curtextbegin;
	CHARTYPE *curtextend;
#if	MEASURE_TIMES
	struct timeval initt, finalt;
#endif
	CHARTYPE *lastout = text;

	m1 = m - 1;
	text = text+m1;
	CurrentByteOffset += m1;
	
	while (text < textend) {
		textstart = text;
		hash = TR[*text];
		hash = (hash << 3) + TR[*(text-1)];
		shift = SHIFT_2[hash];
		while(shift) {
			text = text + shift;
			hash = (TR[*text] << 3) + TR[*(text-1)];
			shift = SHIFT_2[hash];
		}
		CurrentByteOffset += text - textstart;
		j = 0;
		while(TR[pat[m1 - j]] == TR[*(text - j)]) { 
			if(++j == m) break; 
		}
		
		if (j == m ) {
		
			if(text > textend) return 0; /* Udi: used to be >= for some reason */
			
		  	/* added by Udi 11/7/94 */
			
			if(WORDBOUND) {
				if(isalnum(*(text+1))) {shift=1/*TG*/; goto CONT; }	/* as if there was no match */
				if(isalnum(*(text-m))) {shift=1/*TG*/; goto CONT; }	/* as if there was no match */
				
				/* changed by Udi 11/7/94 to avoid having to set TR[] to W_delim */
			}

			if (TCOMPRESSED == ON) {
				/* Don't update CurrentByteOffset here: only before outputting properly */
				if (!DELIMITER) {
					curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
					if (*curtextbegin == '\n') curtextbegin ++;
					curtextend = text+1; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
					if (*curtextend == '\n') curtextend ++;
				}
				else {
					curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
					curtextend = forward_delimiter(text + 1, textend, tc_D_pattern, tc_D_length, OUTTAIL);
				}
			}
			else {

				/* Don't update CurrentByteOffset here: only before outputting properly */

				if (!DELIMITER) {
					curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
					if (*curtextbegin == '\n') curtextbegin ++;
					curtextend = text+1; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
					if (*curtextend == '\n') curtextend ++;
				}
				else {
					curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL);
					curtextend = forward_delimiter(text + 1, textend, D_pattern, D_length, OUTTAIL);
				}
			}

			if (TCOMPRESSED == ON) {

#if     MEASURE_TIMES
                                gettimeofday(&initt, NULL);
#endif  /*MEASURE_TIMES*/
				if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text - curtextbegin + m, EASYSEARCH)) {
					shift = 1;
					goto CONT;	/* as if there was no match */
				}
#if     MEASURE_TIMES
                                gettimeofday(&finalt, NULL);
                                FILTERALGO_ms +=  (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif  /*MEASURE_TIMES*/
			}

			textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */
			num_of_matched++;
			if(FILENAMEONLY)  return 0;
			if (!COUNT) {
				if (!INVERSE) {
					if(FNAME && (NEW_FILE || !POST_FILTER)) {
						char	nextchar = (POST_FILTER == ON)?'\n':' ';
						char	*prevstring = (POST_FILTER == ON)?"\n":"";
						if (agrep_finalfp != NULL)
							fprintf(agrep_finalfp, "%s%s:%c", prevstring, CurrentFileName, nextchar);
						else {
							int outindex;
							if (prevstring[0] != '\0') {
								if(agrep_outpointer + 1 >= agrep_outlen) {
									OUTPUT_OVERFLOW;
									return -1;
								}
								else agrep_outbuffer[agrep_outpointer ++] = prevstring[0];
							}
							for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
									(CurrentFileName[outindex] != '\0'); outindex++) {
								agrep_outbuffer[agrep_outpointer+outindex] = CurrentFileName[outindex];
							}
							if ((CurrentFileName[outindex] != '\0') || (outindex + agrep_outpointer + 2 >= agrep_outlen)) {
								OUTPUT_OVERFLOW;
								return -1;
							}
							agrep_outbuffer[agrep_outpointer + outindex++] = ':';
							agrep_outbuffer[agrep_outpointer + outindex++] = nextchar;
							agrep_outpointer += outindex;
						}
						NEW_FILE = OFF;
						PRINTED = 1;
					}

					if(BYTECOUNT) {
						if (agrep_finalfp != NULL)
							fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
						else {
							char s[32];
							int  outindex;
							sprintf(s, "%d= ", CurrentByteOffset);
							for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -