⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgrep.c

📁 agrep
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Copyright (c) 1991 Sun Wu and Udi Manber.  All Rights Reserved. */#include <stdio.h>#include <ctype.h>#define MAXSYM  256#define MAXMEMBER 8192#define	CHARTYPE	unsigned char#define MaxError 20#define MAXPATT 256#define MAXLINE 1024#define MaxCan  2048#define BLOCKSIZE    8192#define MAX_SHIFT_2  4096#define ON      1#define LOG_ASCII 8#define LOG_DNA  3#define MAXMEMBER_1 65536#define LONG_EXAC  20#define LONG_APPX  24#define W_DELIM    128extern COUNT, FNAME, SILENT, FILENAMEONLY, num_of_matched;extern DNA ;  /* DNA flag is set in checksg when pattern is DNA pattern and		 p_size > 16  */extern WORDBOUND, WHOLELINE, NOUPPER;extern unsigned char CurrentFileName[],  Progname[]; extern unsigned Mask[];extern unsigned endposition;unsigned char BSize;                /* log_c m   */unsigned char char_map[MAXSYM];	/* data area */int shift_1;CHARTYPE SHIFT[MAXSYM];CHARTYPE MEMBER[MAXMEMBER];CHARTYPE pat[MAXPATT];unsigned Hashmask;char MEMBER_1[MAXMEMBER_1];CHARTYPE TR[MAXSYM];char_tr(pat, m)	unsigned char *pat;	int *m;{int i;unsigned char temp[MAXPATT];	for(i=0; i<MAXSYM; i++) TR[i] = i;	if(NOUPPER) {		for(i='A'; i<= 'Z'; i++) TR[i] = i + 'a' - 'A';	}	if(WORDBOUND) { /* SUN: To be added to be more complete */		for(i=0; i<128; i++) {			if(!isalnum(i)) TR[i] = W_DELIM;		}	}	if(WHOLELINE) {		memcpy(temp, pat, *m);		pat[0] = '\n';		memcpy(pat+1, temp, *m);		pat[*m+1] = '\n';		pat[*m+2] = 0;		*m = *m + 2;	}}sgrep(pat, m, fd, D)CHARTYPE *pat;  int fd, m, D;{     CHARTYPE text[BLOCKSIZE+2*MAXLINE+MAXPATT]; /* input text stream */    int offset = 2*MAXLINE;    int buf_end, num_read, i, start, end, residue = 0;    if(pat[0] == '^' || pat[0] == '$') pat[0] = '\n';    if(pat[m-1] == '^' || pat[m-1] == '$') pat[m-1] = '\n';    char_tr(pat, &m);   /* will change pat, and m if WHOLELINE is ON */    text[offset-1] = '\n';  /* initial case */    for(i=0; i < MAXLINE; i++) text[i] = 0;   /* security zone */    start = offset;       if(WHOLELINE) start--;    if(m >= MAXPATT) {         fprintf(stderr, "%s: pattern too long\n", Progname);         exit(2);    }    if(D == 0) {	if(m > LONG_EXAC) m_preprocess(pat);	else prep_bm(pat, m);    }    else if (DNA) prep4(pat, m);	 else 	if(m >= LONG_APPX) am_preprocess(pat);		else {			prep(pat, m, D);			initmask(pat, Mask, m, 0, &endposition); 		}    for(i=1; i<=m; i++) text[BLOCKSIZE+offset+i] = pat[m-1];		/* to make sure the skip loop in bm() won't go out of bound */    while( (num_read = read(fd, text+offset, BLOCKSIZE)) > 0)     {       buf_end = end = offset + num_read -1 ;       while(text[end]  != '\n' && end > offset) end--;       residue = buf_end - end + 1 ;       text[start-1] = '\n';       if(D==0)  {		if(m > LONG_EXAC) monkey(pat, m, text+start, text+end);		else bm(pat, m, text+start, text+end);       }       else {		if(DNA) monkey4( pat, m, text+start, text+end, D  );		else {		  if(m >= LONG_APPX) a_monkey(pat, m, text+start, text+end, D);		  else       agrep(pat, m, text+start, text+end, D);		}       }       if(FILENAMEONLY && num_of_matched) {            printf("%s\n", CurrentFileName);            return; }       start = offset - residue ;       if(start < MAXLINE) {            start = MAXLINE;        }       strncpy(text+start, text+end, residue);       start++;    } /* end of while(num_read = ... */    return;} /* end sgrep *//* SUN: bm assumes that the content of text[n]...text[n+m-1] is pat[m-1] such that the skip loop is guaranteed to terminated */bm(pat, m, text, textend)	CHARTYPE *text, *textend, *pat;  int m;{register int shift;register int  m1, j, d1; /*printf("%d\t", textend - text);printf("%c, %c", *text, *textend);*/d1 = shift_1;    /* at least 1 */m1 = m - 1;shift = 0;       while (text <= textend) {	shift = SHIFT[*(text += shift)];	while(shift) {          		shift = SHIFT[*(text += shift)];		shift = SHIFT[*(text += shift)];		shift = SHIFT[*(text += shift)];	}		j = 0;		while(TR[pat[m1 - j]] == TR[*(text - j)]) {			if(++j == m)  break;       /* if statement can be						    saved, but for safty ... */		}	        if (j == m ) { 			if(text > textend) return;			if(WORDBOUND) {				if(TR[*(text+1)] != W_DELIM) goto CONT;				if(TR[*(text-m)] != W_DELIM) goto CONT;			}			num_of_matched++;			if(FILENAMEONLY) return;			if(!(COUNT)) {				if(FNAME) printf("%s: ", CurrentFileName);				while(*(--text) != '\n');				while(*(++text) != '\n') putchar(*(text));				putchar(*text);			}			else { while(*text != '\n') text++; } CONT:			shift = 1;                }		else shift = d1;  }return;}  /* initmask() initializes the mask table for the pattern                    */ /* endposition is a mask for the endposition of the pattern                 *//* endposition will contain k mask bits if the pattern contains k fragments */initmask(pattern, Mask, m, D, endposition)CHARTYPE *pattern; unsigned *Mask; register int m, D; unsigned *endposition;{  register unsigned Bit1, c;  register int i, j, frag_num;  Bit1 = 1 << 31;    /* the first bit of Bit1 is 1, others 0.  */  frag_num = D+1; *endposition = 0;  for (i = 0; i < frag_num; i++) *endposition = *endposition | (Bit1 >> i);  *endposition = *endposition >> (m - frag_num);  for(i = 0; i < m; i++)           if (pattern[i] == '^' || pattern[i] == '$') {              pattern[i] = '\n';           }  for(i = 0; i < MAXSYM; i++) Mask[i] = ~0;  for(i = 0; i < m; i++)     /* initialize the mask table */  {  c = pattern[i];     for ( j = 0; j < m; j++)           if( c == pattern[j] )               Mask[c] = Mask[c] & ~( Bit1 >> j ) ;  }}prep(Pattern, M, D)             /* preprocessing for partitioning_bm */	CHARTYPE *Pattern;  /* can be fine-tuned to choose a better partition */	register int M, D;{register int i, j, k, p, shift;register unsigned m;unsigned hash, b_size = 3;	m = M/(D+1);	p = M - m*(D+1);	for (i = 0; i < MAXSYM; i++) SHIFT[i] = m;	for (i = M-1; i>=p ; i--) {		shift = (M-1-i)%m;		hash = Pattern[i];		if(SHIFT[hash] > shift) SHIFT[hash] = shift;	}#ifdef DEBUG	for(i=0; i<M; i++) printf(" %d,", SHIFT[Pattern[i]]);	printf("\n");#endif	shift_1 = m;	for(i=0; i<D+1; i++) {		j = M-1 - m*i;		for(k=1; k<m; k++) {			for(p=0; p<D+1; p++) 				if(Pattern[j-k] == Pattern[M-1-m*p]) 					if(k < shift_1) shift_1 = k;		}	}#ifdef DEBUG	printf("\nshift_1 = %d", shift_1);#endif	if(shift_1 == 0) shift_1 = 1;	for(i=0; i<MAXMEMBER; i++) MEMBER[i] = 0;	if (m < 3) b_size = m;	for(i=0; i<D+1; i++) {		j = M-1 - m*i;		hash = 0;		for(k=0; k<b_size; k++) {			hash = (hash << 2) + Pattern[j-k];		}#ifdef DEBUG	printf(" hash = %d,", hash);#endif		MEMBER[hash] = 1;	}}agrep( pat, M, text, textend, D ) int M, D ; register CHARTYPE *text, *textend, *pat;{  register int i;  register int m = M/(D+1);  register CHARTYPE *textstart;  register int shift, HASH;  int  j=0, k, m1, d1;  int  n, cdx;  int  Candidate[MaxCan][2], round, lastend=0;  unsigned R1[MaxError+1], R2[MaxError+1];   register unsigned int r1, endpos, c;   unsigned currentpos;  unsigned Bit1;  unsigned r_newline;  Candidate[0][0] = Candidate[0][1] = 0;   d1 = shift_1;  cdx = 0;  if(m < 3) r1 = m;  else r1 = 3;  textstart = text;  shift = m-1;  while (text < textend) {	shift = SHIFT[*(text += shift)];	while(shift) {		shift = SHIFT[*(text += shift)];		shift = SHIFT[*(text += shift)];	}		j = 1; HASH = *text;		while(j < r1) { HASH = (HASH << 2) + *(text-j);				j++; }	        if (MEMBER[HASH]) { 			i = text - textstart;                     	if((i - M - D - 10) > Candidate[cdx][1]) { 					Candidate[++cdx][0] = i-M-D-2;                          	Candidate[cdx][1] = i+M+D; }                     	else Candidate[cdx][1] = i+M+D;			shift = d1;                }		else shift = d1;  }  text = textstart;  n = textend - textstart;  r_newline = '\n';  /* for those candidate areas, find the D-error matches                     */  if(Candidate[1][0] < 0) Candidate[1][0] = 0;  endpos = endposition;                /* the mask table and the endposition */  Bit1 = (1 << 31);  for(round = 0; round <= cdx; round++)  {  i = Candidate[round][0] ;      if(Candidate[round][1] > n) Candidate[round][1] = n;     if(i < 0) i = 0;#ifdef DEBUG     printf("round: %d, start=%d, end=%d, ", round, i, Candidate[round][1]);#endif     R1[0] = R2[0] = ~0;     R1[1] = R2[1] = ~Bit1;     for(k = 1; k <= D; k++) R1[k] = R2[k] = (R1[k-1] >> 1) & R1[k-1];     while (i < Candidate[round][1])                          {  	    c = text[i++];            if(c == r_newline) {               for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );            }            r1 = Mask[c];            R1[0] = (R2[0] >> 1) | r1;            for(k=1; k<=D; k++)                R1[k] = ((R2[k] >> 1) | r1) & R2[k-1] & ((R1[k-1] & R2[k-1]) >> 1);            if((R1[D] & endpos) == 0) {                                     num_of_matched++;                                    if(FILENAMEONLY) { return; }                                    currentpos = i;                                    if(i <= lastend) i = lastend;                                    else {                                       s_output(text, &currentpos);                                        i = currentpos;                                     }                                    lastend = i;                                    for(k=0; k<=D; k++) R1[k] = R2[k] = ~0;                                  }            c = text[i++];            if(c == r_newline) {                for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 );            }            r1 = Mask[c];            R2[0] = (R1[0] >> 1) | r1;            for(k = 1; k <= D; k++)                R2[k] = ((R1[k] >> 1) | r1) & R1[k-1] & ((R1[k-1] & R2[k-1]) >> 1);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -