⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgrep.c

📁 Mehldau和Myer的Agrep3版本
💻 C
📖 第 1 页 / 共 5 页
字号:
							agrep_outpointer += outindex;
						}
						NEW_FILE = OFF;
						PRINTED = 1;
					}

					if(BYTECOUNT) {
						if (agrep_finalfp != NULL)
							fprintf(agrep_finalfp, "%d= ", CurrentByteOffset);
						else {
							char s[32];
							int  outindex;
							sprintf(s, "%d=", CurrentByteOffset);
							for(outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
									(s[outindex] != '\0'); outindex++) {
								agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
							}
							if (s[outindex] != '\0') {
								OUTPUT_OVERFLOW;
								return -1;
							}
							agrep_outpointer += outindex;
						}
						PRINTED = 1;
					}

					if (PRINTOFFSET) {
						if (agrep_finalfp != NULL)
							fprintf(agrep_finalfp, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
						else {
							char s[32];
							int outindex;
							sprintf(s, "@%d{%d} ", CurrentByteOffset - (text -curtextbegin), curtextend-curtextbegin);
							for (outindex=0; (outindex+agrep_outpointer<agrep_outlen) &&
									 (s[outindex] != '\0'); outindex ++) {
								agrep_outbuffer[agrep_outpointer+outindex] = s[outindex];
							}
							if (s[outindex] != '\0') {
								OUTPUT_OVERFLOW;
								return -1;
							}
							agrep_outpointer += outindex;
						}
						PRINTED = 1;
					}

					CurrentByteOffset += textbegin - text;

					text = textbegin;

					if (PRINTRECORD) {
					if (TCOMPRESSED == ON) {
#if	MEASURE_TIMES
						gettimeofday(&initt, NULL);
#endif	/*MEASURE_TIMES*/
						if (agrep_finalfp != NULL)
							newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_finalfp, -1, EASYSEARCH);
						else {
							if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, curtextbegin, curtextend-curtextbegin, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
								if (agrep_outpointer + newlen + 1 >= agrep_outlen) {
									OUTPUT_OVERFLOW;
									return -1;
								}
								agrep_outpointer += newlen;
							}
						}
#if	MEASURE_TIMES
						gettimeofday(&finalt, NULL);
						OUTFILTER_ms +=  (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);
#endif	/*MEASURE_TIMES*/
					}
					else {
						if (agrep_finalfp != NULL) {
							fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp);
						}
						else {
							if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) {
								OUTPUT_OVERFLOW;
								return -1;
							}
							memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin);
							agrep_outpointer += curtextend - curtextbegin;
						}
					}
					}
					else if (PRINTED) {
						if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);
						else agrep_outbuffer[agrep_outpointer ++] = '\n';
						PRINTED = 0;
					}
				}
				else {	/* INVERSE */
					if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
						if (agrep_finalfp != NULL)
							newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);
						else {
							if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
								if (newlen + agrep_outpointer >= agrep_outlen) {
									OUTPUT_OVERFLOW;
									return -1;
								}
								agrep_outpointer += newlen;
							}
						}
						lastout=textbegin;
						CurrentByteOffset += textbegin - text;
						text = textbegin;
					}
					else { /* NOT TCOMPRESSED */
						if (agrep_finalfp != NULL)
							fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);
						else {
							if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {
								OUTPUT_OVERFLOW;
								return -1;
							}
							memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);
							agrep_outpointer += (curtextbegin - lastout);
						}
						lastout=textbegin;
						CurrentByteOffset += textbegin - text;
						text = textbegin;
					} /* TCOMPRESSED */
				} /* INVERSE */
			}
			else {	/* COUNT */
				CurrentByteOffset += textbegin - text;
				text = textbegin;
			}
			if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
			    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */

/* CONT: was here TG */
			/* shift = 1;			[del] [TG] */
			shift = SHIFT[*text];	/* 	[new] [TG] */
CONT:						/*	now here to restart the skip-loop */
#ifdef _WIN32
;
#endif 
		}
		else shift = d1;
	}

	if (INVERSE && !COUNT && (lastout <= textend)) {
		if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
			if (agrep_finalfp != NULL)
				newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
			else {
				if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
					if (newlen + agrep_outpointer >= agrep_outlen) {
						OUTPUT_OVERFLOW;
						return -1;
					}
					agrep_outpointer += newlen;
				}
			}
		}
		else { /* NOT TCOMPRESSED */
			if (agrep_finalfp != NULL)
				fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
			else {
				if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {
					OUTPUT_OVERFLOW;
					return -1;
				}
				memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1);
				agrep_outpointer += (textend - lastout + 1);
			}
		} /* TCOMPRESSED */
	}

	return 0;
}


/* initmask() initializes the mask table for the pattern                    */ 
/* endposition is a mask for the endposition of the pattern                 */
/* endposition will contain k mask bits if the pattern contains k fragments */

static void initmask(pattern, Mask, m, D, endposition)

CHARTYPE *pattern; 
unsigned *Mask; 
register int m, D; 
unsigned *endposition;
{
	register unsigned Bit1, c;
	register int i, j, frag_num;

	/* Bit1 = 1 << 31;*/    /* the first bit of Bit1 is 1, others 0.  */
	Bit1 = (unsigned)0x80000000;
	frag_num = D+1; 
	*endposition = 0;
	for (i = 0; i < frag_num; i++) *endposition = *endposition | (Bit1 >> i);
	*endposition = *endposition >> (m - frag_num);
	for(i = 0; i < m; i++) 
		if (pattern[i] == '^' || pattern[i] == '$') {
			pattern[i] = '\n'; 
		}
	for(i = 0; i < MAXSYM; i++) Mask[i] = ~0;
	for(i = 0; i < m; i++)     /* initialize the mask table */
	{  
		c = pattern[i];
		for ( j = 0; j < m; j++)
			if( c == pattern[j] )
				Mask[c] = Mask[c] & ~( Bit1 >> j ) ;
	}
}

static void
prep(Pattern, M, D)             /* preprocessing for partitioning_bm */
CHARTYPE *Pattern;  /* can be fine-tuned to choose a better partition */
register int M, D;
{
	register int i, j, k, p, shift;
	register unsigned m;
	unsigned hash, b_size = 3;
	m = M/(D+1);
	p = M - m*(D+1);
	for (i = 0; i < MAXSYM; i++) SHIFT[i] = m;
	for (i = M-1; i>=p ; i--) {
		shift = (M-1-i)%m;
		hash = Pattern[i];
		if((int)(SHIFT[hash]) > (int)(shift)) SHIFT[hash] = shift;
	}
#ifdef DEBUG
	for(i=0; i<M; i++) printf(" %d,", SHIFT[Pattern[i]]);
	printf("\n");
#endif
	shift_1 = m;
	for(i=0; i<D+1; i++) {
		j = M-1 - m*i;
		for(k=1; k<m; k++) {
			for(p=0; p<D+1; p++) 
				if(Pattern[j-k] == Pattern[M-1-m*p]) 
					if(k < shift_1) shift_1 = k;
		}
	}
#ifdef DEBUG
	printf("\nshift_1 = %d", shift_1);
#endif
	if(shift_1 == 0) shift_1 = 1;
	for(i=0; i<MAXMEMBER; i++) MEMBER[i] = 0;
	if (m < 3) b_size = m;
	for(i=0; i<D+1; i++) {
		j = M-1 - m*i;
		hash = 0;
		for(k=0; k<b_size; k++) {
			hash = (hash << 2) + Pattern[j-k];
		}
#ifdef DEBUG
		printf(" hash = %d,", hash);
#endif
		MEMBER[hash] = 1;
	}
}

int
agrep( pat, M, text, textend, D, oldpat, oldM) 
int M, D, oldM; 
register CHARTYPE *text, *textend, *pat, *oldpat;
{
	register int i;
	register int m = M/(D+1);
	register CHARTYPE *textbegin;
	CHARTYPE *textstart;
	register int shift, HASH;
	int  j=0, k, d1;
	int  n, cdx;
	int  Candidate[MaxCan][2], round, lastend=0;
	unsigned R1[MaxError+1], R2[MaxError+1]; 
	register unsigned int r1, endpos, c; 
	unsigned currentpos;
	unsigned Bit1;
	unsigned r_newline;
	int oldbyteoffset;
	CHARTYPE *lastout = text;
	int newlen;

	Candidate[0][0] = Candidate[0][1] = 0; 
	d1 = shift_1;
	cdx = 0;
	if(m < 3) r1 = m;
	else r1 = 3;
	textbegin = text;
	shift = m-1;
	while (text < textend) {
		textstart = text;
		shift = SHIFT[*(text += shift)];
		while(shift) {
			shift = SHIFT[*(text += shift)];
			shift = SHIFT[*(text += shift)];
		}
		CurrentByteOffset += text - textstart;
		j = 1; 
		HASH = *text;
		while(j < r1) { 
			HASH = (HASH << 2) + *(text-j);
			j++; 
		}
		if (MEMBER[HASH]) { 
			i = text - textbegin;
			if((i - M - D - 10) > Candidate[cdx][1]) { 	
				Candidate[++cdx][0] = i-M-D-2;
				Candidate[cdx][1] = i+M+D; 
			}
			else Candidate[cdx][1] = i+M+D;
			shift = d1;
		}
		else shift = d1;
	}

	CurrentByteOffset += (textbegin - text);
	text = textbegin;
	n = textend - textbegin;
	r_newline = '\n';
	/* for those candidate areas, find the D-error matches                     */
	if(Candidate[1][0] < 0) Candidate[1][0] = 0;
	endpos = endposition;                /* the mask table and the endposition */
	/* Bit1 = (1 << 31); */
	Bit1 = (unsigned)0x80000000;
	oldbyteoffset = CurrentByteOffset;
	for(round = 0; round <= cdx; round++)
	{  
		i = Candidate[round][0] ; 
		if(Candidate[round][1] > n) Candidate[round][1] = n;
		if(i < 0) i = 0;
		CurrentByteOffset = oldbyteoffset+i;
		R1[0] = R2[0] = ~0;
		R1[1] = R2[1] = ~Bit1;
		for(k = 1; k <= D; k++) R1[k] = R2[k] = (R1[k-1] >> 1) & R1[k-1];
		while (i < Candidate[round][1])                     
		{  
			c = text[i++];
			CurrentByteOffset ++;
			if(c == r_newline) {
				for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
			}
			r1 = Mask[c];
			R1[0] = (R2[0] >> 1) | r1;
			for(k=1; k<=D; k++)
				R1[k] = ((R2[k] >> 1) | r1) & R2[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
			if((R1[D] & endpos) == 0) { 
				num_of_matched++;
				if(FILENAMEONLY) return 0; 
				currentpos = i;
				if(i <= lastend) {
					CurrentByteOffset += lastend - i;
					i = lastend;
				}
				else {
					int oldcurrentpos = currentpos;
					if (-1 == s_output(text, &currentpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1;
					CurrentByteOffset += currentpos - oldcurrentpos;
					i = currentpos; 
				}
				lastend = i;
				for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
				if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
				    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */
			}

			/* copying the code to save a few instructions.
			you need to understand the shift-or algorithm
			to figure this one... */

			c = text[i++];
			CurrentByteOffset ++;
			if(c == r_newline) {
				for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );
			}
			r1 = Mask[c];
			R2[0] = (R1[0] >> 1) | r1;
			for(k = 1; k <= D; k++)
				R2[k] = ((R1[k] >> 1) | r1) & R1[k-1] & ((R1[k-1] & R2[k-1]) >> 1);
			if((R2[D] & endpos) == 0) { 
				currentpos = i;
				num_of_matched++;
				if(FILENAMEONLY) return 0; 
				if(i <= lastend) {
					CurrentByteOffset += lastend - i;
					i = lastend;
				}
				else {
					int oldcurrentpos = currentpos;
					if (-1 == s_output(text, &currentpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1;
					CurrentByteOffset += currentpos - oldcurrentpos;
					i = currentpos; 
				}
				lastend = i;
				for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;
				if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
				    ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0;	/* done */
			}
		}
	}


	if (INVERSE && !COUNT && (lastout <= textend)) {
		if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */
			if (agrep_finalfp != NULL)
				newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH);
			else {
				if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {
					if (newlen + agrep_outpointer >= agrep_outlen) {
						OUTPUT_OVERFLOW;
						return -1;
					}
					agrep_outpointer += newlen;
				}
			}
		}
		else { /* NOT TCOMPRESSED */
			if (agrep_finalfp != NULL)
				fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp);
			else {
				if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -