📄 mwm.c
字号:
int(*match) ( void * id, int index,void* data ), void * data ){ int Tleft, index, nfound, tshift, ng; unsigned char *T, *Tend, *B; nfound = 0; Tleft = n; Tend = Tx + n; /* Test if text is shorter than the shortest pattern */ if( (unsigned)n < ps->msShiftLen ) return 0; /* Process each suffix of the Text, left to right, incrementing T so T = S[j] */ for( T = Tx, B = Tx + ps->msShiftLen - 1; B < Tend; T++, B++, Tleft-- ) { /* Multi-Pattern Bad Word Shift */ tshift = ps->msShift2[ ((*B)<<8) | *(B-1) ]; while( tshift ) { B += tshift; T += tshift; Tleft -= tshift; if( B >= Tend ) return nfound; tshift = ps->msShift2[ ((*B)<<8) | *(B-1) ]; } /* Test for last char in Text, we are done, one byte pattern test was done above. */ if( Tleft == 1 ) return nfound; /* Test if the 2 char prefix of this suffix shows up in the hash table */ if( (index = ps->msHash [ ( (*T)<<8 ) | *(T+1) ] ) == (HASH_TYPE)-1 ) continue; /* Match this group against the current suffix */ ng = mwmGroupMatch2( ps, index,Tx, T, Tc, Tleft, data, match ); if( ng < 0 ) { ng = -ng; ng--; nfound += ng; return nfound; } else { nfound += ng; } } return nfound;}/* Display function for testing */static void show_bytes(unsigned n, unsigned char *p){ int i; for(i=0;i<(int)n;i++) { if( p[i] >=32 && p[i]<=127 )printf("%c",p[i]); else printf("\\x%2.2X",p[i]); } }/*** Display patterns in this group*/void mwmGroupDetails( void * pv ){ MWM_STRUCT * ps = (MWM_STRUCT*)pv; int index,i, m, gmax=0, total=0,gavg=0,subgroups; static int k=0; MWM_PATTERN_STRUCT *patrn, *patrnEnd; printf("*** MWM-Pattern-Group: %d\n",k++); subgroups=0; for(i=0;i<65536;i++) { if( (index = ps->msHash [i]) == (HASH_TYPE)-1 ) continue; patrn = &ps->msPatArray[index]; /* 1st pattern of hash group is here */ patrnEnd = patrn + ps->msNumArray[index];/* never go here... */ printf(" Sub-Pattern-Group: %d-%d\n",subgroups,i); subgroups++; for( m=0; patrn < patrnEnd; m++, patrn++ ) /* Test all patterns in the group */ { printf(" Pattern[%d] : ",m); show_bytes(patrn->psLen,patrn->psPat); printf("\n"); } if( m > gmax ) gmax = m; total+=m; gavg = total / subgroups; } printf("Total Group Patterns : %d\n",total); printf(" Number of Sub-Groups : %d\n",subgroups); printf(" Sub-Group Max Patterns: %d\n",gmax); printf(" Sub-Group Avg Patterns: %d\n",gavg);} /***** mwmPrepPatterns:: Prepare the pattern group for searching***/int mwmPrepPatterns( void * pv ){ MWM_STRUCT * ps = (MWM_STRUCT *) pv; int kk; MWM_PATTERN_STRUCT * plist; /* Build an array of pointers to the list of Pattern nodes */ ps->msPatArray = (MWM_PATTERN_STRUCT*)calloc( sizeof(MWM_PATTERN_STRUCT), ps->msNumPatterns ); if( !ps->msPatArray ) { return -1; } ps->msNumArray = (unsigned short *)calloc( sizeof(short), ps->msNumPatterns ); if( !ps->msNumArray ) { return -1; } /* Copy the list node info into the Array */ for( kk=0, plist = ps->plist; plist!=NULL && kk < ps->msNumPatterns; plist=plist->next ) { memcpy( &ps->msPatArray[kk++], plist, sizeof(MWM_PATTERN_STRUCT) ); } mwmAnalyzePattens( ps ); /* Sort the patterns */ qsort( ps->msPatArray, ps->msNumPatterns, sizeof(MWM_PATTERN_STRUCT), sortcmp ); /* Build the Hash table, and pattern groups, per Wu & Manber */ mwmPrepHashedPatternGroups(ps); /* Select the Pattern Matcher Class */ if( ps->msNumPatterns < 5 ) { ps->msMethod = MTH_BM; } else { ps->msMethod = MTH_MWM; } /* Setup Wu-Manber */ if( ps->msMethod == MTH_MWM ) { /* Build the Bad Char Shift Table per Wu & Manber */ mwmPrepBadCharTable(ps); /* Build the Bad Word Shift Table per Wu & Manber */ if( (ps->msShiftLen > 1) && ps->msLargeShifts ) { mbmPrepBadWordTable( ps ); } /* Min patterns is 1 byte */ if( ps->msShiftLen == 1 ) { ps->search = mwmSearchExNoBC; } /* Min patterns is >1 byte */ else if( (ps->msShiftLen > 1) && !ps->msLargeShifts ) { ps->search = mwmSearchExBC; } /* Min patterns is >1 byte - and we've been asked to use a 2 byte bad words shift instead. */ else if( (ps->msShiftLen > 1) && ps->msLargeShifts && ps->msShift2 ) { ps->search = mwmSearchExBW; } /* Min patterns is >1 byte */ else { ps->search = mwmSearchExBC; }#ifdef XXXX // if( ps->msDetails ) /* For testing - show this info */ // mwmGroupDetails( ps );#endif } /* Initialize the Boyer-Moore Pattern data */ if( ps->msMethod == MTH_BM ) { int i; /* Allocate and initialize the BMH data for each pattern */ for(i=0;i<ps->msNumPatterns;i++) { ps->msPatArray[ i ].psBmh = hbm_prep( ps->msPatArray[ i ].psPat, ps->msPatArray[ i ].psLen ); } } return 0;}/*** Search a body of text or data for paterns */int mwmSearch( void * pv, unsigned char * T, int n, int(*match)( void * id, int index, void * data ), void * data ){ MWM_STRUCT * ps = (MWM_STRUCT*)pv; iPatCount += n; ConvCaseToUpperEx( S, T, n ); /* Copy and Convert to Upper Case */ if( ps->msMethod == MTH_BM ) { /* Boyer-Moore */ int i,nfound=0; unsigned char * Tx; for( i=0; i<ps->msNumPatterns; i++ ) { Tx = hbm_match( ps->msPatArray[i].psBmh, S, n ); if( Tx ) { /* If we are case sensitive, do a final exact match test */ if( !ps->msPatArray[i].psNoCase ) { if( memcmp(ps->msPatArray[i].psPatCase,&T[Tx-S],ps->msPatArray[i].psLen) ) continue; /* no match, next pattern please */ } nfound++; if( match(ps->msPatArray[i].psID, (int)(Tx-S),data) ) return nfound; } } return nfound; } else /* MTH_MWM */ { /* Wu-Manber */ return ps->search( ps, S, n, T, match, data ); }}/****/void mwmFeatures(void){ printf("%s\n",MWM_FEATURES);}/****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************///Below are for test#ifdef MWM_MAINint FatalError( char * s ){ printf("FatalError: %s\n",s); exit(0);}/* global array of pattern pointers feeds of of ID..see argv parseing...*/char * patArray[10000];/*** Routine to process matches*/static int match ( void* id, int index, void * data ){ printf(" pattern matched: index= %d, id=%d, %s \n", index, id, patArray[(int)id] ); return 0;}/**/typedef struct{ unsigned char * b; int blen;}BINARY;/**/int gethex( int c ){ if( c >= 'A' && c <= 'F' ) return c -'A' + 10; if( c >= 'a' && c <= 'f' ) return c -'a' + 10; if( c >= '0' && c <= '9' ) return c -'0'; return 0;}/**/BINARY * converthexbytes( unsigned char * s){ int val, k=0, m; BINARY * p; int len = strlen(s); printf("--input hex: %s\n",s); p = malloc( sizeof(BINARY) ); p->b = malloc( len / 2 ); p->blen= len / 2; while( *s ) { val = gethex(*s); s++; val <<= 4; if( !*s ) break; // check that we have 2 digits for hex, else ignore the 1st val |= gethex(*s); s++; p->b[k++] = val; } if( k != p->blen ) { printf("hex length mismatch\n"); } printf("--output hex[%d]: ",p->blen); for(m=0;m<p->blen;m++) printf("%2.2x", p->b[m]); printf(" \n"); return p;}/* Synthetic data*/BINARY * syndata( int nbytes, int irand, int repchar ){ BINARY * p =(BINARY*)malloc( sizeof(BINARY) ); if( ! p ) return 0; p->b = (unsigned char *)malloc( nbytes ); if( ! p->b ) return 0; p->blen = nbytes; if( irand ) { int i; srand( time(0) ); for(i=0;i<nbytes;i++) { p->b[i] = (unsigned)( rand() & 0xff ); } } else { memset(p->b,repchar,nbytes); } return p;}/**/int randpat( unsigned char * s, int imin ){ int i,len; static int first=1; if( first ) { first=0; srand(time(0)); } while( 1 ) { len = rand() & 0xf; //max of 15 bytes if( len >= imin ) break; } for(i=0;i<len;i++) { s[i] = 'a' + ( rand() % 26 ); // a-z } s[len]=0; printf("--%s\n",s); return len;}/*** Test driver */int CDECL main ( int argc, char ** argv ){ unsigned char *T, *text; int n,textlen; int nmatches, i, bm = 0; MWM_STRUCT *ps; int npats=0, len, stat, nocase=0; BINARY *p; int irep=0,irand=0,isyn=0,nrep=1024,repchar=0; if( argc < 5 ) { printf("usage: %s [-rand|-rep -n bytes -c|ch repchar -pr npats minlen ] -t|th TextToSearch -nocase [-pr numpats] -p|ph pat1 -p|ph pat2 -p|ph pat3\n",argv[0]); exit(1); } /* -- Allocate a Pattern Matching Structure - and Init it. */ ps = mwmNew(); for(i=1;i<argc;i++) { if( strcmp(argv[i],"-nocase")==0 ) { nocase = 1; } if( strcmp(argv[i],"-p")==0 ) { i++; npats++; patArray[npats] = argv[i]; len = strlen( argv[i] ); mwmAddPatternEx( ps, (unsigned char*)argv[i], len, nocase, 0, 0, (void*)npats /* ID */, 3000 /* IID -internal id*/ ); } if( strcmp(argv[i],"-ph")==0 ) { i++; npats++; patArray[npats] = argv[i]; p = converthexbytes( argv[i] ); mwmAddPatternEx( ps, p->b, p->blen, 1 /* nocase*/, 0, 0, (void*)npats /* ID */, 3000 /* IID -internal id*/ ); } if( strcmp(argv[i],"-pr")==0 ) { int m = atoi( argv[++i] ); int imin = atoi( argv[++i] ); int k; npats = 0; for(k=0;k<m;k++) { unsigned char px[256]; int len = randpat( px, imin ); npats++; patArray[npats] = (unsigned char *)malloc( len+1 ); sprintf(patArray[npats],"%s",px); mwmAddPatternEx( ps, px, len, 0 /* nocase*/, 0, 0, (void*)npats /* ID */, 3000 /* IID -internal id*/ ); } } if( strcmp(argv[i],"-rand")==0 ) { irand = 1; isyn = 1; } if( strcmp(argv[i],"-rep")==0 ) { irep = 1; isyn = 1; } if( strcmp(argv[i],"-n")==0 ) { nrep = atoi( argv[++i] ); isyn = 1; } if( strcmp(argv[i],"-c")==0 ) { repchar = argv[++i][0]; isyn = 1; } if( strcmp(argv[i],"-ch")==0 ) { BINARY * px = converthexbytes( argv[++i] ); repchar = px->b[0]; isyn = 1; } if( strcmp(argv[i],"-t")==0 ) { i++; text = argv[i]; textlen=strlen(text); } if( strcmp(argv[i],"-th")==0 ) { i++; p = converthexbytes( argv[i] ); text = p->b; textlen = p->blen; } } /* generate synthetic text */ if( isyn ) { p = syndata( nrep, irand, repchar ); text = p->b; textlen = p->blen; } /* --- Preprocess Patterns */ mwmPrepPatterns( ps ); /* ---- Do a multi-pattern search in the Text */ stat = mwmSearch( (void*)ps, (unsigned char*)text, textlen, match, 0 ); if( stat == 0 ) { printf("no pattern matches\n"); }else{ printf("%d pattern matches in list\n",stat); } return 0;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -