⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mgrep.c

📁 一个关于多模匹配算法的实现
💻 C
字号:
/* Copyright (c) 1991 Sun Wu and Udi Manber.  All Rights Reserved. */
/* multipattern matcher */
#include <stdio.h>
#include <ctype.h>
#define MAXPAT  256
#define MAXLINE 1024
#define MAXSYM  256
#define MAXMEMBER1 4096
#define MAXPATFILE 260000
#define BLOCKSIZE  8192
#define MAXHASH    8192
#define mm 	   8191
#define max_num    30000  
#define W_DELIM	   128
#define L_DELIM    10 

extern  FNAME, num_of_matched;
extern unsigned char Progname[]; 

int LONG  = 0;
int SHORT = 0;
int p_size=0;
unsigned char SHIFT1[MAXMEMBER1];
unsigned char tr[MAXSYM];
unsigned char tr1[MAXSYM];
struct pat_list {
	int  index;
	struct pat_list *next;
} *HASH[MAXHASH];
struct pat_list  *pt, *qt;
unsigned char buf[MAXPATFILE+BLOCKSIZE];
unsigned char pat_spool[MAXPATFILE+2*max_num+MAXPAT];
unsigned char *patt[max_num];
unsigned char pat_len[max_num];


prepf(fp)
int fp;
{
    int length=0, i, p=1, pdx=0, num_pat;
    unsigned char *pat_ptr=pat_spool, temp[10];
    unsigned Mask = 15;
    int num_read;

    while((num_read = read(fp, buf+length, BLOCKSIZE)) > 0) {
	length = length + num_read;
	if(length > MAXPATFILE) {
		printf("maximum pattern file size is %d\n",MAXPATFILE);
		exit(2);
	}
    }
    buf[length] = '\n';
    i = 0; p=1;
    while(i<length) {
	patt[p] = pat_ptr;
	//if(WORDBOUND) *pat_ptr++ = W_DELIM;
	//if(WHOLELINE) *pat_ptr++ = L_DELIM;
	while((*pat_ptr = buf[i++]) != '\n') pat_ptr++;
	//if(WORDBOUND) *pat_ptr++ = W_DELIM;
	//if(WHOLELINE) *pat_ptr++ = L_DELIM;           /* Can't be both on */
	*pat_ptr++ = 0;
	p++;  
    }
    if(p>max_num) {
	printf("maximum number of patterns is %d\n",max_num); 
	exit(2);
    }
    for(i=1; i<20; i++) *pat_ptr = i;  /* boundary safety zone */
    for(i=0; i< MAXSYM; i++) tr[i] = i;
    for(i=0; i< MAXSYM; i++) tr1[i] = tr[i]&Mask;
    num_pat = p-1;
    p_size = MAXPAT;
    for(i=1 ; i <= num_pat; i++) {
	p = strlen(patt[i]);
	pat_len[i] = p;
	if(p!=0 && p < p_size) p_size = p;
    }
    if(p_size == 0) {
	printf("the pattern file is empty\n");
	exit(2);
    }
    if(length > 400 && p_size > 2) LONG = 1;
    if(p_size == 1) SHORT = 1;
    for(i=0; i<MAXMEMBER1; i++) SHIFT1[i] = p_size - 2;
    for(i=0; i<MAXHASH; i++) {
	HASH[i] = 0;
    }
    for(i=1; i<= num_pat; i++) f_prep(i, patt[i]);
}


mgrep(fd)
int fd;
{ 
    register char r_newline = '\n';
    unsigned char text[2*BLOCKSIZE+MAXLINE]; 
    register int buf_end, num_read, i=0, j, start, end, residue = 0;

    text[MAXLINE-1] = '\n';  /* initial case */
    start = MAXLINE-1;

    while( (num_read = read(fd, text+MAXLINE, BLOCKSIZE)) > 0) 
    {
//       countline(text+MAXLINE, num_read);
       buf_end = end = MAXLINE + num_read -1 ;
       while(text[end]  != r_newline && end > MAXLINE) end--;
       residue = buf_end - end  + 1 ;
       text[start-1] = r_newline;
       if(SHORT) m_short(text, start, end);
       else      monkey1(text, start, end);
       start = MAXLINE - residue;
       if(start < 0) {
            start = 1; 
       }
       strncpy(text+start, text+end, residue);
    } /* end of while(num_read = ... */
    text[MAXLINE] = '\n';
    text[start-1] = '\n';
    if(residue > 1) {
        if(SHORT) m_short(text, start, end);
        else      monkey1(text, start, end);
    }
    return;
} /* end mgrep */

monkey1( text, start, end  ) 
int start, end; register unsigned char *text;
{
register unsigned char *textend;
register unsigned hash, i;
register unsigned char shift; 
register int  m1, j, Long=LONG; 
int pat_index, m=p_size; 
int MATCHED=0;
register unsigned char *qx;
register struct pat_list *p;
unsigned char *lastout;
int OUT=0;

textend = text + end;
m1 = m - 1;
lastout = text+start+1;
text = text + start + m1 ;
while (text <= textend) 
{
	hash=tr1[*text];
	hash=(hash<<4)+(tr1[*(text-1)]);
	if(Long) hash=(hash<<4)+(tr1[*(text-2)]);
	shift = SHIFT1[hash];
	if(shift == 0) {
		hash=0;
		for(i=0;i<=m1;i++)  {
		    hash=(hash<<4)+(tr1[*(text-i)]);
		}
		hash=hash&mm;
		p = HASH[hash];
		while(p != 0) 
		{
			pat_index = p->index;
			p = p->next;
			qx = text-m1;
			j = 0;
			while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
	        	if (j > m1 ) 
			   if(pat_len[pat_index] <= j) 
			   {
				if(text > textend) return;
                                printf("matched_word:%s\n",patt[pat_index]);
                		num_of_matched++;
				MATCHED=1;
                          	while(*(--text) != '\n');
				if(lastout < text) OUT=1;
				while(lastout < text) putchar(*lastout++);
				if(OUT) {
					putchar('\n');
					OUT=0;
					}
                        	while(*(++text) != '\n');
				lastout=text+1;
                	}
			if(MATCHED) break;
		}
		if(!MATCHED) shift = 1;
		else {
			MATCHED = 0;
			shift = m1;
		}
        }
	text = text + shift;
  }
}

m_short( text, start, end  ) 
int start, end; register unsigned char *text;
{
register unsigned char *textend;
register unsigned i; 
register int  j; 
register struct pat_list *p, *q;
register int pat_index; 
int MATCHED=0;
int OUT=0;
unsigned char *lastout;
unsigned char *qx;

textend = text + end;
lastout = text + start + 1;
text = text + start - 1 ;
while (++text <= textend) {
		p = HASH[*text];
		while(p != 0) {
			pat_index = p->index;
			p = p->next;
			qx = text;
			j = 0;
			while(tr[patt[pat_index][j]] == tr[*(qx++)]) j++;
			if(pat_len[pat_index] <= j) {
				if(text >= textend) return;
                		num_of_matched++;
                        	while(*(--text) != '\n');
				if(lastout < text) OUT=1;
				while(lastout < text) putchar(*lastout++);
				if(OUT) {
					putchar('\n');
					OUT=0;
				}
                         	while(*(++text) != '\n');
				lastout=text+1;
				MATCHED = 1;
                	}
			if(MATCHED) break;
		}
		MATCHED = 0;
  } /* while */
}

f_prep(pat_index, Pattern)
unsigned char *Pattern ; int pat_index;
{
int i, j, m;
register unsigned hash, Mask=15;
	m = p_size;
	for (i = m-1; i>=(1+LONG); i--) {
		hash = (Pattern[i] & Mask);
		hash = (hash << 4) + (Pattern[i-1]& Mask);
		if(LONG) hash = (hash << 4) + (Pattern[i-2] & Mask);
		if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i;
	}
	if(SHORT) Mask = 255;  /* 011111111 */
	hash = 0;
	for(i = m-1; i>=0; i--)  {
	    hash = (hash << 4) + (tr[Pattern[i]]&Mask);
	}
	hash=hash&mm;
	qt = (struct pat_list *) malloc(sizeof(struct pat_list));
	qt->index = pat_index;
	pt = HASH[hash];
	qt->next = pt;
	HASH[hash] = qt;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -