⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mwm.c

📁 一种多模串匹配算法
💻 C
📖 第 1 页 / 共 3 页
字号:
                int(*match) ( void *  id, int index,void* data ),                void * data ){  int                 Tleft, index, nfound, tshift, ng;  unsigned char      *T, *Tend, *B;  nfound = 0;  Tleft = n;  Tend  = Tx + n;  /* Test if text is shorter than the shortest pattern */  if( (unsigned)n < ps->msShiftLen )      return 0;  /*  Process each suffix of the Text, left to right, incrementing T so T = S[j] */  for( T = Tx, B = Tx + ps->msShiftLen - 1; B < Tend; T++, B++, Tleft-- )  {     /* Multi-Pattern Bad Word Shift */     tshift = ps->msShift2[ ((*B)<<8) | *(B-1) ];     while( tshift )      {        B     += tshift;  T += tshift; Tleft -= tshift;        if( B >= Tend ) return nfound;        tshift = ps->msShift2[ ((*B)<<8) | *(B-1) ];     }     /* Test for last char in Text, we are done, one byte pattern test was done above. */     if( Tleft == 1 ) return nfound;      /* Test if the 2 char prefix of this suffix shows up in the hash table */     if( (index = ps->msHash [ ( (*T)<<8 ) | *(T+1) ] ) == (HASH_TYPE)-1 )         continue;      /* Match this group against the current suffix */     ng = mwmGroupMatch2( ps, index,Tx, T, Tc, Tleft, data, match );     if( ng < 0 )     {         ng = -ng;         ng--;         nfound += ng;         return nfound;     }     else     {        nfound += ng;     }  }  return nfound;}/* Display function for testing */static void show_bytes(unsigned n, unsigned char *p){    int i;    for(i=0;i<(int)n;i++)    {       if( p[i] >=32 && p[i]<=127 )printf("%c",p[i]);       else printf("\\x%2.2X",p[i]);    }  }/***   Display patterns in this group*/void mwmGroupDetails( void * pv ){   MWM_STRUCT * ps = (MWM_STRUCT*)pv;   int index,i, m, gmax=0, total=0,gavg=0,subgroups;   static int k=0;   MWM_PATTERN_STRUCT *patrn, *patrnEnd;    printf("*** MWM-Pattern-Group: %d\n",k++);     subgroups=0;       for(i=0;i<65536;i++)    {       if( (index = ps->msHash [i]) == (HASH_TYPE)-1 )           continue; 	          patrn    = &ps->msPatArray[index];       /* 1st pattern of hash group is here */       patrnEnd = patrn + ps->msNumArray[index];/* never go here... */           printf("  Sub-Pattern-Group: %d-%d\n",subgroups,i);              subgroups++;              for( m=0; patrn < patrnEnd; m++, patrn++ )  /* Test all patterns in the group */       {          printf("   Pattern[%d] : ",m);	  show_bytes(patrn->psLen,patrn->psPat);	  	  printf("\n");       }                if( m > gmax ) gmax = m;              total+=m;              gavg = total / subgroups;    }        printf("Total Group Patterns    : %d\n",total);    printf("  Number of Sub-Groups  : %d\n",subgroups);    printf("  Sub-Group Max Patterns: %d\n",gmax);    printf("  Sub-Group Avg Patterns: %d\n",gavg);}      /***** mwmPrepPatterns::    Prepare the pattern group for searching***/int mwmPrepPatterns( void * pv ){   MWM_STRUCT * ps = (MWM_STRUCT *) pv;   int kk;   MWM_PATTERN_STRUCT * plist;   /* Build an array of pointers to the list of Pattern nodes */   ps->msPatArray = (MWM_PATTERN_STRUCT*)calloc( sizeof(MWM_PATTERN_STRUCT), ps->msNumPatterns );   if( !ps->msPatArray )    {         return -1;    }   ps->msNumArray = (unsigned short *)calloc( sizeof(short), ps->msNumPatterns  );   if( !ps->msNumArray )    {         return -1;    }   /* Copy the list node info into the Array */   for( kk=0, plist = ps->plist; plist!=NULL && kk < ps->msNumPatterns; plist=plist->next )   {        memcpy( &ps->msPatArray[kk++], plist, sizeof(MWM_PATTERN_STRUCT) );   }     mwmAnalyzePattens( ps );  /* Sort the patterns */  qsort( ps->msPatArray, ps->msNumPatterns, sizeof(MWM_PATTERN_STRUCT), sortcmp );   /* Build the Hash table, and pattern groups, per Wu & Manber */  mwmPrepHashedPatternGroups(ps);  /* Select the Pattern Matcher Class */  if( ps->msNumPatterns < 5 )  {     ps->msMethod =  MTH_BM;  }  else  {     ps->msMethod =  MTH_MWM;  }   /* Setup Wu-Manber */  if( ps->msMethod == MTH_MWM )  {      /* Build the Bad Char Shift Table per Wu & Manber */      mwmPrepBadCharTable(ps);     /* Build the Bad Word Shift Table per Wu & Manber */     if( (ps->msShiftLen > 1) && ps->msLargeShifts )      {       mbmPrepBadWordTable( ps );     }     /* Min patterns is 1 byte */     if( ps->msShiftLen == 1 )      {        ps->search =  mwmSearchExNoBC;     }     /* Min patterns is >1 byte */     else if( (ps->msShiftLen >  1) && !ps->msLargeShifts )      {      ps->search =  mwmSearchExBC;     }     /* Min patterns is >1 byte - and we've been asked to use a 2 byte bad words shift instead. */     else if( (ps->msShiftLen >  1) && ps->msLargeShifts && ps->msShift2 )      {      ps->search =  mwmSearchExBW;     }     /* Min patterns is >1 byte */     else     {        ps->search =  mwmSearchExBC;     }#ifdef XXXX          // if( ps->msDetails )   /* For testing - show this info */    //    mwmGroupDetails( ps );#endif   }   /* Initialize the Boyer-Moore Pattern data */   if( ps->msMethod == MTH_BM )   {       int i;       /* Allocate and initialize the BMH data for each pattern */       for(i=0;i<ps->msNumPatterns;i++)       {           ps->msPatArray[ i ].psBmh = hbm_prep( ps->msPatArray[ i ].psPat, ps->msPatArray[ i ].psLen );       }      }   return 0;}/*** Search a body of text or data for paterns */int mwmSearch( void * pv,               unsigned char * T, int n,  int(*match)( void * id,  int index, void * data ),               void * data ){      MWM_STRUCT * ps = (MWM_STRUCT*)pv;       iPatCount += n;      ConvCaseToUpperEx( S, T, n ); /* Copy and Convert to Upper Case */      if( ps->msMethod == MTH_BM )      {         /* Boyer-Moore  */         int i,nfound=0;         unsigned char * Tx;         for( i=0; i<ps->msNumPatterns; i++ )         {            Tx = hbm_match( ps->msPatArray[i].psBmh, S, n );            if( Tx )            {               /* If we are case sensitive, do a final exact match test */               if( !ps->msPatArray[i].psNoCase )               {                 if( memcmp(ps->msPatArray[i].psPatCase,&T[Tx-S],ps->msPatArray[i].psLen) )                     continue; /* no match, next pattern please */               }               nfound++;                 if( match(ps->msPatArray[i].psID, (int)(Tx-S),data) )                  return nfound;            }         }         return nfound;      }      else /* MTH_MWM */      {         /* Wu-Manber */         return ps->search( ps, S, n, T, match, data );      }}/****/void mwmFeatures(void){   printf("%s\n",MWM_FEATURES);}/****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************//****************************************************************************************///Below are for test#ifdef MWM_MAINint FatalError( char * s ){   printf("FatalError: %s\n",s);   exit(0);}/*    global array of pattern pointers feeds of of ID..see argv parseing...*/char * patArray[10000];/*** Routine to process matches*/static int match (  void* id, int index, void * data ){   printf(" pattern matched: index= %d, id=%d, %s \n",   index, id, patArray[(int)id]  );   return 0;}/**/typedef struct{  unsigned char * b;  int blen;}BINARY;/**/int gethex( int c ){      if( c >= 'A' && c <= 'F' ) return c -'A' + 10;   if( c >= 'a' && c <= 'f' ) return c -'a' + 10;   if( c >= '0' && c <= '9' ) return c -'0';   return 0;}/**/BINARY  * converthexbytes( unsigned char * s){   int      val, k=0, m;   BINARY * p;   int      len = strlen(s);   printf("--input hex: %s\n",s);      p = malloc( sizeof(BINARY) );   p->b   = malloc( len / 2 );   p->blen= len / 2;   while( *s )   {      val   = gethex(*s);      s++;      val <<= 4;      if( !*s ) break; // check that we have 2 digits for hex, else ignore the 1st      val |= gethex(*s);      s++;      p->b[k++] = val;   }   if( k != p->blen )   {      printf("hex length mismatch\n");   }   printf("--output hex[%d]: ",p->blen); for(m=0;m<p->blen;m++) printf("%2.2x", p->b[m]);   printf(" \n");   return p;}/*   Synthetic data*/BINARY * syndata( int nbytes, int irand, int repchar ){   BINARY * p =(BINARY*)malloc( sizeof(BINARY) );   if( ! p ) return 0;    p->b    = (unsigned char *)malloc( nbytes );   if( ! p->b ) return 0;   p->blen = nbytes;   if( irand )   {     int i;     srand( time(0) );     for(i=0;i<nbytes;i++)     {        p->b[i] = (unsigned)( rand() & 0xff );     }   }   else   {       memset(p->b,repchar,nbytes);   }   return p;}/**/int randpat( unsigned char * s, int imin ){    int i,len;    static int first=1;    if( first )    {       first=0;       srand(time(0));    }    while( 1 )    {       len = rand() & 0xf; //max of 15 bytes        if( len >= imin ) break;    }        for(i=0;i<len;i++)    {        s[i] = 'a' + ( rand() % 26 ); // a-z    }    s[len]=0;    printf("--%s\n",s);    return len;}/*** Test driver */int CDECL main ( int argc, char ** argv ){   unsigned char *T, *text;   int            n,textlen;    int            nmatches, i, bm = 0;   MWM_STRUCT    *ps;   int            npats=0, len, stat, nocase=0;   BINARY        *p;   int            irep=0,irand=0,isyn=0,nrep=1024,repchar=0;   if( argc < 5 )   {      printf("usage: %s [-rand|-rep -n bytes -c|ch repchar -pr npats minlen ] -t|th TextToSearch -nocase [-pr numpats] -p|ph pat1 -p|ph pat2 -p|ph pat3\n",argv[0]);      exit(1);   }   /* -- Allocate a Pattern Matching Structure - and Init it. */   ps = mwmNew();   for(i=1;i<argc;i++)   {       if( strcmp(argv[i],"-nocase")==0 )       {          nocase = 1;       }       if( strcmp(argv[i],"-p")==0 )       {          i++;          npats++;          patArray[npats] = argv[i];          len = strlen( argv[i] );          mwmAddPatternEx( ps, (unsigned char*)argv[i], len, nocase, 0, 0, (void*)npats /* ID */, 3000 /* IID -internal id*/ );       }       if( strcmp(argv[i],"-ph")==0 )       {          i++;          npats++;          patArray[npats] = argv[i];          p = converthexbytes( argv[i] );          mwmAddPatternEx( ps, p->b, p->blen, 1 /* nocase*/, 0, 0, (void*)npats /* ID */, 3000 /* IID -internal id*/ );       }       if( strcmp(argv[i],"-pr")==0 )       {          int m = atoi( argv[++i] );          int imin = atoi( argv[++i] );          int k;          npats = 0;          for(k=0;k<m;k++)          {             unsigned char px[256];             int           len = randpat( px, imin );             npats++;             patArray[npats] = (unsigned char *)malloc( len+1 );             sprintf(patArray[npats],"%s",px);             mwmAddPatternEx( ps, px, len, 0 /* nocase*/, 0, 0, (void*)npats /* ID */, 3000 /* IID -internal id*/ );          }       }       if( strcmp(argv[i],"-rand")==0 )       {          irand = 1;          isyn  = 1;       }       if( strcmp(argv[i],"-rep")==0 )       {          irep  = 1;          isyn  = 1;       }       if( strcmp(argv[i],"-n")==0 )       {          nrep = atoi( argv[++i] );          isyn  = 1;       }       if( strcmp(argv[i],"-c")==0 )       {          repchar = argv[++i][0];          isyn  = 1;       }       if( strcmp(argv[i],"-ch")==0 )       {          BINARY * px = converthexbytes( argv[++i] );          repchar = px->b[0];          isyn  = 1;       }       if( strcmp(argv[i],"-t")==0 )       {          i++;          text = argv[i];          textlen=strlen(text);       }       if( strcmp(argv[i],"-th")==0 )       {           i++;           p = converthexbytes( argv[i] );           text = p->b;           textlen = p->blen;       }   }   /* generate synthetic text */   if( isyn )   {               p = syndata( nrep, irand, repchar );       text = p->b;       textlen = p->blen;   }   /* --- Preprocess Patterns */   mwmPrepPatterns( ps );     /* ---- Do a multi-pattern search in the Text */   stat = mwmSearch( (void*)ps, (unsigned char*)text, textlen, match, 0 );    if( stat == 0 )   {       printf("no pattern matches\n");   }else{       printf("%d pattern matches in list\n",stat);   }   return 0;}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -