⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 newmgrep.c

📁 Mehldau和Myer的Agrep3版本
💻 C
📖 第 1 页 / 共 5 页
字号:
/* shift is always 1: slight change in MATCHED semantics: it is set to 1 even if COUNT is set: previously, it wasn't set. Will it effect m_short? */
int
tc_m_short(text, start, end)
int start, end; register uchar *text;
{
	int PRINTED = 0;
	int pat_index;
        unsigned char *oldtext;
	register uchar *textend;
	unsigned char *textbegin;
	unsigned char *curtextend;
	unsigned char *curtextbegin;
	register int p, p_end;
	int MATCHED=0;
	/* int OUT=0; */
	uchar *lastout;
	uchar *qx;
	uchar *px;
	int j;
	int DOWITHMASK;
	struct timeval initt, finalt;
	int newlen;

	DOWITHMASK = 0;
	if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
	textend = text + end;
	lastout = text + start;
	text = text + start - 1 ;
	textbegin = text + start;
	/* WORDBOUND adjustment not required */
	while (++text <= textend) {
		CurrentByteOffset ++;
		p = tc_HASH[tc_tr[*text]];
		p_end = tc_HASH[tc_tr[*text]+1];
		while(p++ < p_end) {
			if (((pat_index = tc_pat_indices[p]) <= 0) || (tc_pat_len[pat_index] <= 0)) continue;
#ifdef	debug
			printf("m_short(): p=%d pat_index=%d off=%d\n", p, pat_index, textend - text);
#endif
			px = tc_PatPtr[p];
			qx = text;
			while((*px!=0)&&(tc_tr[*px] == tc_tr[*qx])) {
				px++;
				qx++;
			}
			if (*px == 0) {
				if(text >= textend) return 0;

				if (!DOWITHMASK) {
					/* Don't update CurrentByteOffset here: only before outputting properly */
					if (!DELIMITER) {
						curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n'));
						if (*curtextbegin == '\n') curtextbegin ++;
						curtextend = text+1; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++;
						if (*curtextend == '\n') curtextend ++;
					}
					else {
						curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL);
						curtextend = forward_delimiter(text+1, textend, tc_D_pattern, tc_D_length, OUTTAIL);
					}
				}
				/* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */
#if	MEASURE_TIMES
				gettimeofday(&initt, NULL);
#endif	/*MEASURE_TIMES*/
				/* Was it really a match in the compressed line from prev line in text to text + strlen(tc_pat_len[pat_index]? */
				if (-1 == exists_tcompressed_word(tc_PatPtr[p], tc_pat_len[pat_index], curtextbegin, text - curtextbegin + tc_pat_len[pat_index], EASYSEARCH))
					goto skip_output;
#if     MEASURE_TIMES
				gettimeofday(&finalt, NULL);
				FILTERALGO_ms +=  (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif  /*MEASURE_TIMES*/

				if (!DOWITHMASK) {
					if (!OUTTAIL || INVERSE) textbegin = curtextend;
					else if (DELIMITER) textbegin = curtextend - D_length;
					else textbegin = curtextend - 1;
				}
				DOWITHMASK = 1;
				amatched_terminals[pat_index-1] = 1;
				if (AComplexBoolean) {
					/* Can output only after all the matches in the current record have been identified: just like filter_output */
					oldtext = text;
					CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);
					text = oldtext + pat_len[pat_index] - 1;
					MATCHED = 0;
					goto skip_output;
				}
				else if ((long)AParse & AND_EXP) {
					for (j=0; j<anum_terminals; j++) if (!amatched_terminals[j]) break;
					if (j<anum_terminals) goto skip_output;
				}

				MATCHED = 1;
				oldtext = text; /* used only if MULTI_OUTPUT */

#undef	DO_OUTPUT
#define DO_OUTPUT(change_text)\
				num_of_matched++;\
				if(FILENAMEONLY || SILENT)  return 0;\
				if (!COUNT) {\
					PRINTED = print_options(pat_index, text, curtextbegin, curtextend);\
					if(!INVERSE) {\
						if (PRINTRECORD) {\
/* #if     MEASURE_TIMES\
						gettimeofday(&initt, NULL);\
#endif  MEASURE_TIMES*/\
						if (agrep_finalfp != NULL)\
							newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);\
						else {\
							if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\
								if (newlen + agrep_outpointer >= agrep_outlen) {\
									OUTPUT_OVERFLOW;\
									return -1;\
								}\
								agrep_outpointer += newlen;\
							}\
						}\
/*#if     MEASURE_TIMES\
						gettimeofday(&finalt, NULL);\
						OUTFILTER_ms +=  (finalt.tv_sec* 1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);\
#endif  MEASURE_TIMES*/\
						}\
						else if (PRINTED) {\
							if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\
							else agrep_outbuffer[agrep_outpointer ++] = '\n';\
							PRINTED = 0;\
						}\
						if ((change_text) && MULTI_OUTPUT) {     /* next match starting from end of current */\
							CurrentByteOffset += (oldtext + tc_pat_len[pat_index] - 1 - text);\
							text = oldtext + tc_pat_len[pat_index] - 1;\
							MATCHED = 0;\
						}\
						else if (change_text) {\
							CurrentByteOffset += textbegin - text;\
							text = textbegin;\
						}\
					}\
					else {	/* INVERSE: Don't care about filtering time */\
						/* if(lastout < curtextbegin) OUT=1; */\
						if (agrep_finalfp != NULL)\
							newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);\
						else {\
							if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\
								if (newlen + agrep_outpointer >= agrep_outlen) {\
									OUTPUT_OVERFLOW;\
									return -1;\
								}\
								agrep_outpointer += newlen;\
							}\
						}\
						lastout=textbegin;\
						if (change_text) {\
							CurrentByteOffset += textbegin - text;\
							text = textbegin;\
						}\
					}\
				}\
				else if (change_text) {\
					CurrentByteOffset += textbegin - text;\
					text = textbegin;\
				}\
				if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\
				    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */\

				DO_OUTPUT(1)
			}

		skip_output:
                        if(MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break;     /* else look for more possible matches */
			if (DOWITHMASK && (text >= curtextend - 1)) {
				DOWITHMASK = 0;
				if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
					DO_OUTPUT(0)
				}
				if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
			}
		}
		/* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */
		if (DOWITHMASK && (text >= curtextend - 1)) {
			DOWITHMASK = 0;
			if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
				DO_OUTPUT(0)
			}
			if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
		}
		if (MATCHED) text--;
		MATCHED = 0;
	} /* while */
	CurrentByteOffset ++;

	/* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */

	if (DOWITHMASK && (text >= curtextend - 1)) {
		DOWITHMASK = 0;
		if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) {
			DO_OUTPUT(0)
		}
		if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals);
	}

	if (INVERSE && !COUNT && (lastout <= textend)) {
		if (agrep_finalfp != NULL)
			newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
		else {
			if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
				if (newlen + agrep_outpointer >= agrep_outlen) {
					OUTPUT_OVERFLOW;
					return -1;
				}
				agrep_outpointer += newlen;
			}
		}
	}

        return 0;
}
#endif	/*DOTCOMPRESSED*/

static void
f_prep(pat_index, Pattern)
uchar *Pattern;   int pat_index;
{
int i, m;
register unsigned hash=0;
#ifdef debug
	puts(Pattern);
#endif
	m = p_size;
		for (i=m-1; i>=(1+LONG); i--) {
				hash = (tr1[Pattern[i]]);
				hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
		if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
		if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
	}
	i=m-1;
		hash = (tr1[Pattern[i]]);
		hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
	if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
		if(SHORT) hash=tr[Pattern[0]];
#ifdef debug
	printf("hash = %d\n", hash);
#endif
		HASH[hash]++;
		return;
}

#if	DOTCOMPRESSED
static void
tc_f_prep(pat_index, Pattern)
uchar *Pattern;   int pat_index;
{
int i, m;
register unsigned hash=0;
#ifdef debug
	puts(Pattern);
#endif
	m = tc_p_size;
		for (i=m-1; i>=(1+tc_LONG); i--) {
				hash = (tc_tr1[Pattern[i]]);
				hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
		if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
		if(tc_SHIFT1[hash] >= m-1-i) tc_SHIFT1[hash] = m-1-i;
	}
	i=m-1;
		hash = (tc_tr1[Pattern[i]]);
		hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
	if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
		if(tc_SHORT) hash=tc_tr[Pattern[0]];
#ifdef debug
	printf("hash = %d\n", hash);
#endif
		tc_HASH[hash]++;
		return;
}
#endif	/*DOTCOMPRESSED*/

static void
f_prep1(pat_index, Pattern)
uchar *Pattern;   int pat_index;
{
int i, m;
int hash2;
register unsigned hash;
	m = p_size;
#ifdef debug
	puts(Pattern);
#endif
		for (i=m-1; i>=(1+LONG); i--) {
				hash = (tr1[Pattern[i]]);
				hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
		if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
		if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
	}
	i=m-1;
		hash = (tr1[Pattern[i]]);
		hash = (hash << Hbits) + (tr1[Pattern[i-1]]);
	if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] );
		if(SHORT) hash=tr[Pattern[0]];
	hash2 = (tr[Pattern[0]] << 8) + tr[Pattern[1]];
#ifdef debug
	printf("hash = %d, HASH[hash] = %d\n", hash, HASH[hash]);
#endif
		PatPtr[HASH[hash]] = Pattern;
		pat_indices[HASH[hash]] = pat_index;
	Hash2[HASH[hash]] = hash2;
		HASH[hash]--;
		return;
}

#if	DOTCOMPRESSED
static void
tc_f_prep1(pat_index, Pattern)
uchar *Pattern;   int pat_index;
{
int i, m;
int hash2;
register unsigned hash;
	m = tc_p_size;
#ifdef debug
	puts(Pattern);
#endif
		for (i=m-1; i>=(1+tc_LONG); i--) {
				hash = (tc_tr1[Pattern[i]]);
				hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
		if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
		if(tc_SHIFT1[hash] >= m-1-i) tc_SHIFT1[hash] = m-1-i;
	}
	i=m-1;
		hash = (tc_tr1[Pattern[i]]);
		hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]);
	if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] );
		if(tc_SHORT) hash=tc_tr[Pattern[0]];
	hash2 = (tc_tr[Pattern[0]] << 8) + tc_tr[Pattern[1]];
#ifdef debug
	printf("hash = %d, tc_HASH[hash] = %d\n", hash, tc_HASH[hash]);
#endif
		tc_PatPtr[tc_HASH[hash]] = Pattern;
		tc_pat_indices[tc_HASH[hash]] = pat_index;
	tc_Hash2[tc_HASH[hash]] = hash2;
		tc_HASH[hash]--;
		return;
}
#endif	/*DOTCOMPRESSED*/

static void
accumulate()
{
	int i;

	for(i=1; i<MAXHASH; i++)  {
	/*
	printf("%d, ", HASH[i]);
	*/
	HASH[i] = HASH[i-1] + HASH[i];
	}
	HASH[0] = 0;
	return;
}

#if	DOTCOMPRESSED
static void
tc_accumulate()
{
	int i;

	for(i=1; i<MAXHASH; i++)  {
	/*
	printf("%d, ", HASH[i]);
	*/
	tc_HASH[i] = tc_HASH[i-1] + tc_HAS

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -