📄 acsmx2.c
字号:
if(s_verbose)printf(" Begin AddPatternStates: acsmNumStates=%d\n",acsm->acsmNumStates); if(s_verbose)printf(" adding '%.*s', nocase=%d\n", n,p->patrn, p->nocase ); /* * Match up pattern with existing states */ for (; n > 0; pattern++, n--) { if(s_verbose)printf(" find char='%c'\n", *pattern ); next = List_GetNextState(acsm,state,*pattern); if (next == ACSM_FAIL_STATE2 || next == 0) { break; } state = next; } /* * Add new states for the rest of the pattern bytes, 1 state per byte */ for (; n > 0; pattern++, n--) { if(s_verbose)printf(" add char='%c' state=%d NumStates=%d\n", *pattern, state, acsm->acsmNumStates ); acsm->acsmNumStates++; List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates); state = acsm->acsmNumStates; } AddMatchListEntry (acsm, state, p ); if(s_verbose)printf(" End AddPatternStates: acsmNumStates=%d\n",acsm->acsmNumStates);}/** Build A Non-Deterministic Finite Automata* The keyword state table must already be built, via AddPatternStates().*/ static voidBuild_NFA (ACSM_STRUCT2 * acsm) { int r, s, i; QUEUE q, *queue = &q; acstate_t * FailState = acsm->acsmFailState; ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList; ACSM_PATTERN2 * mlist,* px; /* Init a Queue */ queue_init (queue); /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */ for (i = 0; i < acsm->acsmAlphabetSize; i++) { s = List_GetNextState2(acsm,0,i); if( s ) { queue_add (queue, s); FailState[s] = 0; } } /* Build the fail state successive layer of transitions */ while (queue_count (queue) > 0) { r = queue_remove (queue); /* Find Final States for any Failure */ for (i = 0; i < acsm->acsmAlphabetSize; i++) { int fs, next; s = List_GetNextState(acsm,r,i); if( s != ACSM_FAIL_STATE2 ) { queue_add (queue, s); fs = FailState[r]; /* * Locate the next valid state for 'i' starting at fs */ while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 ) { fs = FailState[fs]; } /* * Update 's' state failure state to point to the next valid state */ FailState[s] = next; /* * Copy 'next'states MatchList to 's' states MatchList, * we copy them so each list can be AC_FREE'd later, * else we could just manipulate pointers to fake the copy. */ for( mlist = MatchList[next]; mlist; mlist = mlist->next) { px = CopyMatchListEntry (mlist); /* Insert at front of MatchList */ px->next = MatchList[s]; MatchList[s] = px; } } } } /* Clean up the queue */ queue_free (queue); if( s_verbose)printf("End Build_NFA: NumStates=%d\n",acsm->acsmNumStates);}/** Build Deterministic Finite Automata from the NFA*/ static voidConvert_NFA_To_DFA (ACSM_STRUCT2 * acsm) { int i, r, s, cFailState; QUEUE q, *queue = &q; acstate_t * FailState = acsm->acsmFailState; /* Init a Queue */ queue_init (queue); /* Add the state 0 transitions 1st */ for(i=0; i<acsm->acsmAlphabetSize; i++) { s = List_GetNextState(acsm,0,i); if ( s != 0 ) { queue_add (queue, s); } } /* Start building the next layer of transitions */ while( queue_count(queue) > 0 ) { r = queue_remove(queue); /* Process this states layer */ for (i = 0; i < acsm->acsmAlphabetSize; i++) { s = List_GetNextState(acsm,r,i); if( s != ACSM_FAIL_STATE2 && s!= 0) { queue_add (queue, s); } else { cFailState = List_GetNextState(acsm,FailState[r],i); if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 ) { List_PutNextState(acsm,r,i,cFailState); } } } } /* Clean up the queue */ queue_free (queue); if(s_verbose)printf("End Convert_NFA_To_DFA: NumStates=%d\n",acsm->acsmNumStates);}/*** Convert a row lists for the state table to a full vector format**/static int Conv_List_To_Full(ACSM_STRUCT2 * acsm) { int tcnt, k; acstate_t * p; acstate_t ** NextState = acsm->acsmNextState; for(k=0;k<acsm->acsmMaxStates;k++) { p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) ); if(!p) return -1; tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 ); p[0] = ACF_FULL; p[1] = 0; /* no matches yet */ NextState[k] = p; /* now we have a full format row vector */ } return 0;}/** Convert DFA memory usage from list based storage to a sparse-row storage. ** The Sparse format allows each row to be either full or sparse formatted. If the sparse row has* too many transitions, performance or space may dictate that we use the standard full formatting * for the row. More than 5 or 10 transitions per state ought to really whack performance. So the * user can specify the max state transitions per state allowed in the sparse format. ** Standard Full Matrix Format* ---------------------------* acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)** example: * * events -> a b c d e f g h i j k l m n o p* states * N 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0* * Sparse Format, each row : Words Value* 1-1 fmt(0-full,1-sparse,2-banded,3-sparsebands)* 2-2 bool match flag (indicates this state has pattern matches)* 3-3 sparse state count ( # of input/next-state pairs )* 4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)* * above example case yields:* Full Format: 0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...* Sparse format: 1, 3, 'a',1,'b',7,'f',3 - uses 2+2*ntransitions (non-default transitions)*/ static int Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm) { int cnt, m, k, i; acstate_t * p, state, maxstates=0; acstate_t ** NextState = acsm->acsmNextState; acstate_t full[MAX_ALPHABET_SIZE]; for(k=0;k<acsm->acsmMaxStates;k++) { cnt=0; List_ConvToFull(acsm, (acstate_t)k, full ); for (i = 0; i < acsm->acsmAlphabetSize; i++) { state = full[i]; if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++; } if( cnt > 0 ) maxstates++; if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes ) { p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) ); if(!p) return -1; p[0] = ACF_FULL; p[1] = 0; memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t)); } else { p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt)); if(!p) return -1; m = 0; p[m++] = ACF_SPARSE; p[m++] = 0; /* no matches */ p[m++] = cnt; for(i = 0; i < acsm->acsmAlphabetSize ; i++) { state = full[i]; if( state != 0 && state != ACSM_FAIL_STATE2 ) { p[m++] = i; p[m++] = state; } } } NextState[k] = p; /* now we are a sparse formatted state transition array */ } return 0;}/* Convert Full matrix to Banded row format. Word values 1 2 -> banded 2 n number of values 3 i index of 1st value (0-256) 4 - 3+n next-state values at each index*/static int Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm) { int first = -1, last; acstate_t * p, state, full[MAX_ALPHABET_SIZE]; acstate_t ** NextState = acsm->acsmNextState; int cnt,m,k,i; for(k=0;k<acsm->acsmMaxStates;k++) { cnt=0; List_ConvToFull(acsm, (acstate_t)k, full ); first=-1; last =-2; for (i = 0; i < acsm->acsmAlphabetSize; i++) { state = full[i]; if( state !=0 && state != ACSM_FAIL_STATE2 ) { if( first < 0 ) first = i; last = i; } } /* calc band width */ cnt= last - first + 1; p = AC_MALLOC(sizeof(acstate_t)*(4+cnt)); if(!p) return -1; m = 0; p[m++] = ACF_BANDED; p[m++] = 0; /* no matches */ p[m++] = cnt; p[m++] = first; for(i = first; i <= last; i++) { p[m++] = full[i]; } NextState[k] = p; /* now we are a banded formatted state transition array */ } return 0;}/** Convert full matrix to Sparse Band row format.** next - Full formatted row of next states* asize - size of alphabet* zcnt - max number of zeros in a run of zeros in any given band.** Word Values* 1 ACF_SPARSEBANDS* 2 number of bands* repeat 3 - 5+ ....once for each band in this row.* 3 number of items in this band* 4 start index of this band* 5- next-state values in this band...*/static int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax ){ int i, nbands,zcnt,last=0; acstate_t state; nbands=0; for( i=0; i<asize; i++ ) { state = next[i]; if( state !=0 && state != ACSM_FAIL_STATE2 ) { begin[nbands] = i; zcnt=0; for( ; i< asize; i++ ) { state = next[i]; if( state ==0 || state == ACSM_FAIL_STATE2 ) { zcnt++; if( zcnt > zmax ) break; } else { zcnt=0; last = i; } } end[nbands++] = last; } } return nbands;}/** Sparse Bands** Row Format:* Word* 1 SPARSEBANDS format indicator* 2 bool indicates a pattern match in this state* 3 number of sparse bands* 4 number of elements in this band* 5 start index of this band* 6- list of next states* * m number of elements in this band* m+1 start index of this band* m+2- list of next states*/static int Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm) { acstate_t * p; acstate_t ** NextState = acsm->acsmNextState; int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt; int band_begin[MAX_ALPHABET_SIZE]; int band_end[MAX_ALPHABET_SIZE]; int nbands,j; acstate_t full[MAX_ALPHABET_SIZE]; for(k=0;k<acsm->acsmMaxStates;k++) { cnt=0; List_ConvToFull(acsm, (acstate_t)k, full ); nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt ); /* calc band width space*/ cnt = 3; for(i=0;i<nbands;i++) { cnt += 2; cnt += band_end[i] - band_begin[i] + 1; /*printf("state %d: sparseband %d, first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */ } p = AC_MALLOC(sizeof(acstate_t)*(cnt)); if(!p) return -1; m = 0; p[m++] = ACF_SPARSEBANDS; p[m++] = 0; /* no matches */ p[m++] = nbands; for( i=0;i<nbands;i++ ) { p[m++] = band_end[i] - band_begin[i] + 1; /* # states in this band */ p[m++] = band_begin[i]; /* start index */ for( j=band_begin[i]; j<=band_end[i]; j++ ) { p[m++] = full[j]; /* some states may be state zero */ } } NextState[k] = p; /* now we are a sparse-banded formatted state transition array */ } return 0;}void Print_DFA_MatchList( ACSM_STRUCT2 * acsm, int state ){ ACSM_PATTERN2 * mlist; for (mlist = acsm->acsmMatchList[state]; mlist; mlist = mlist->next) { printf("%.*s ", mlist->n, mlist->patrn); }}/***/static voidPrint_DFA(ACSM_STRUCT2 * acsm) { int k,i; acstate_t * p, state, n, fmt, index, nb, bmatch; acstate_t ** NextState = acsm->acsmNextState; printf("Print DFA - %d active states\n",acsm->acsmNumStates); for(k=0;k<acsm->acsmNumStates;k++) { p = NextState[k]; if( !p ) continue; fmt = *p++; bmatch = *p++; printf("state %3d, fmt=%d: ",k,fmt); if( fmt ==ACF_SPARSE ) { n = *p++; for( ; n>0; n--, p+=2 ) { if( isprint(p[0]) ) printf("%3c->%-5d\t",p[0],p[1]); else printf("%3d->%-5d\t",p[0],p[1]); } } else if( fmt ==ACF_BANDED ) { n = *p++; index = *p++; for( ; n>0; n--, p++ ) { if( isprint(p[0]) ) printf("%3c->%-5d\t",index++,p[0]); else printf("%3d->%-5d\t",index++,p[0]); } } else if( fmt ==ACF_SPARSEBANDS ) { nb = *p++; for(i=0;i<nb;i++) { n = *p++; index = *p++; for( ; n>0; n--, p++ ) { if( isprint(index) ) printf("%3c->%-5d\t",index++,p[0]); else printf("%3d->%-5d\t",index++,p[0]); } } } else if( fmt == ACF_FULL ) { for( i=0; i<acsm->acsmAlphabetSize; i++ ) { state = p[i]; if( state != 0 && state != ACSM_FAIL_STATE2 ) { if( isprint(i) ) printf("%3c->%-5d\t",i,state); else printf("%3d->%-5d\t",i,state); } } } Print_DFA_MatchList( acsm, k); printf("\n"); }}/** Write a state table to disk*//*static voidWrite_DFA(ACSM_STRUCT2 * acsm, char * f) { int k,i; acstate_t * p, n, fmt, index, nb, bmatch; acstate_t ** NextState = acsm->acsmNextState; FILE * fp; printf("Dump DFA - %d active states\n",acsm->acsmNumStates); fp = fopen(f,"wb"); if(!fp) { printf("*** WARNING: could not write dfa to file - %s\n",f); return; } fwrite( &acsm->acsmNumStates, 4, 1, fp); for(k=0;k<acsm->acsmNumStates;k++) { p = NextState[k]; if( !p ) continue; fmt = *p++; bmatch = *p++; fwrite( &fmt, sizeof(acstate_t), 1, fp); fwrite( &bmatch, sizeof(acstate_t), 1, fp); if( fmt ==ACF_SPARSE ) { n = *p++; fwrite( &n, sizeof(acstate_t), 1, fp); fwrite( p, n*2*sizeof(acstate_t), 1, fp); } else if( fmt ==ACF_BANDED ) { n = *p++; fwrite( &n, sizeof(acstate_t), 1, fp); index = *p++; fwrite( &index, sizeof(acstate_t), 1, fp);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -