📄 acsmx2.c
字号:
/*** $Id$** ** acsmx2.c**** Multi-Pattern Search Engine**** Aho-Corasick State Machine - version 2.0**** Supports both Non-Deterministic and Deterministic Finite Automata ****** Reference - Efficient String matching: An Aid to Bibliographic Search** Alfred V Aho and Margaret J Corasick** Bell Labratories ** Copyright(C) 1975 Association for Computing Machinery,Inc**** +++** +++ Version 1.0 notes - Marc Norton:** +++**** Original implementation based on the 4 algorithms in the paper by Aho & Corasick,** some implementation ideas from 'Practical Algorithms in C', and some** of my own. **** 1) Finds all occurrences of all patterns within a text.**** +++** +++ Version 2.0 Notes - Marc Norton/Dan Roelker:** +++** ** New implementation modifies the state table storage and access model to use** compacted sparse vector storage. Dan Roelker and I hammered this strategy out** amongst many others in order to reduce memory usage and improve caching performance.** The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching** performance is better in pure benchmarking tests, but does not show overall improvement ** in Snort. Unfortunately, once a pattern match test has been performed Snort moves on to doing** many other things before we get back to a patteren match test, so the cache is voided.** ** This versions has better caching performance characteristics, reduced memory, ** more state table storage options, and requires no a priori case conversions. ** It does maintain the same public interface. (Snort only used banded storage).**** 1) Supports NFA and DFA state machines, and basic keyword state machines** 2) Initial transition table uses Linked Lists** 3) Improved state table memory options. NFA and DFA state ** transition tables are converted to one of 4 formats during compilation.** a) Full matrix ** b) Sparse matrix** c) Banded matrix (Default-this is the only one used in snort)** d) Sparse-Banded matrix** 4) Added support for acstate_t in .h file so we can compile states as ** 16, or 32 bit state values for another reduction in memory consumption,** smaller states allows more of the state table to be cached, and improves** performance on x86-P4. Your mileage may vary, especially on risc systems.** 5) Added a bool to each state transition list to indicate if there is a matching** pattern in the state. This prevents us from accessing another data array** and can improve caching/performance.** 6) The search functions are very sensitive, don't change them without extensive testing,** or you'll just spoil the caching and prefetching opportunities.** ** Extras for fellow pattern matchers:** The table below explains the storage format used at each step.** You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.** You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.** For applications where you have lots of data and a pattern set to search, this version was up to 3x faster** than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,** but other applications may have better caching opportunities.** Snort only needs to use the banded or full storage.**** Transition table format at each processing stage.** -------------------------------------------------** Patterns -> Keyword State Table (List)** Keyword State Table -> NFA (List)** NFA -> DFA (List)** DFA (List)-> Sparse Rows O(m-avg # transitions per state)** -> Banded Rows O(1) ** -> Sparse-Banded Rows O(nb-# bands)** -> Full Matrix O(1)**** Copyright(C) 2002,2003,2004 Marc Norton** Copyright(C) 2003,2004 Daniel Roelker ** Copyright(C) 2002,2003,2004 Sourcefire,Inc.**** This program is free software; you can redistribute it and/or modify** it under the terms of the GNU General Public License as published by** the Free Software Foundation; either version 2 of the License, or** (at your option) any later version.**** This program is distributed in the hope that it will be useful,** but WITHOUT ANY WARRANTY; without even the implied warranty of** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the** GNU General Public License for more details.**** You should have received a copy of the GNU General Public License** along with this program; if not, write to the Free Software** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.**/ #include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h> #include "acsmx2.h" /***/ #define MEMASSERT(p,s) if(!p){printf("ACSM-No Memory: %s!\n",s);exit(0);}/***/ static int max_memory = 0;/***/ static int s_verbose=0;/***/ typedef struct acsm_summary_s{ unsigned num_states; unsigned num_transitions; ACSM_STRUCT2 acsm;}acsm_summary_t;/***/ static acsm_summary_t summary={0,0}; /*** Case Translation Table */ static unsigned char xlatcase[256];/***/ staticvoidinit_xlatcase() { int i; for (i = 0; i < 256; i++) { xlatcase[i] = toupper(i); }}/** Case Conversion*/ static inline voidConvertCaseEx (unsigned char *d, unsigned char *s, int m) { int i;#ifdef XXXX int n; n = m & 3; m >>= 2; for (i = 0; i < m; i++ ) { d[0] = xlatcase[ s[0] ]; d[2] = xlatcase[ s[2] ]; d[1] = xlatcase[ s[1] ]; d[3] = xlatcase[ s[3] ]; d+=4; s+=4; } for (i=0; i < n; i++) { d[i] = xlatcase[ s[i] ]; }#else for (i=0; i < m; i++) { d[i] = xlatcase[ s[i] ]; }#endif}/***/ void acsmSetVerbose2(int n){ s_verbose = 1;}/***/ static void *AC_MALLOC (int n) { void *p; p = malloc (n); if (p) max_memory += n; return p;}/***/ static voidAC_FREE (void *p) { if (p) free (p);}/** Simple QUEUE NODE*/ typedef struct _qnode{ int state; struct _qnode *next;}QNODE;/** Simple QUEUE Structure*/ typedef struct _queue{ QNODE * head, *tail; int count;}QUEUE;/** Initialize the queue*/ static voidqueue_init (QUEUE * s) { s->head = s->tail = 0; s->count= 0;}/** Find a State in the queue*/ static intqueue_find (QUEUE * s, int state) { QNODE * q; q = s->head; while( q ) { if( q->state == state ) return 1; q = q->next; } return 0;}/** Add Tail Item to queue (FiFo/LiLo)*/ static voidqueue_add (QUEUE * s, int state) { QNODE * q; if( queue_find( s, state ) ) return; if (!s->head) { q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE)); MEMASSERT (q, "queue_add"); q->state = state; q->next = 0; } else { q = (QNODE *) AC_MALLOC (sizeof (QNODE)); q->state = state; q->next = 0; s->tail->next = q; s->tail = q; } s->count++;}/** Remove Head Item from queue*/ static intqueue_remove (QUEUE * s) { int state = 0; QNODE * q; if (s->head) { q = s->head; state = q->state; s->head = s->head->next; s->count--; if( !s->head ) { s->tail = 0; s->count = 0; } AC_FREE (q); } return state;}/** Return items in the queue*/ static intqueue_count (QUEUE * s) { return s->count;}/** Free the queue*/ static voidqueue_free (QUEUE * s) { while (queue_count (s)) { queue_remove (s); }}/** Get Next State-NFA*/static int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input ){ trans_node_t * t = acsm->acsmTransTable[state]; while( t ) { if( t->key == input ) { return t->next_state; } t=t->next; } if( state == 0 ) return 0; return ACSM_FAIL_STATE2; /* Fail state ??? */}/** Get Next State-DFA*/static int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input ){ trans_node_t * t = acsm->acsmTransTable[state]; while( t ) { if( t->key == input ) { return t->next_state; } t = t->next; } return 0; /* default state */}/** Put Next State - Head insertion, and transition updates*/static int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state ){ trans_node_t * p; trans_node_t * tnew; // printf(" List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state); /* Check if the transition already exists, if so just update the next_state */ p = acsm->acsmTransTable[state]; while( p ) { if( p->key == input ) /* transition already exists- reset the next state */ { p->next_state = next_state; return 0; } p=p->next; } /* Definitely not an existing transition - add it */ tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t)); if( !tnew ) return -1; tnew->key = input; tnew->next_state = next_state; tnew->next = 0; tnew->next = acsm->acsmTransTable[state]; acsm->acsmTransTable[state] = tnew; acsm->acsmNumTrans++; return 0; }/** Free the entire transition table */static int List_FreeTransTable( ACSM_STRUCT2 * acsm ){ int i; trans_node_t * t, *p; if( !acsm->acsmTransTable ) return 0; for(i=0;i< acsm->acsmMaxStates;i++) { t = acsm->acsmTransTable[i]; while( t ) { p = t->next; free(t); t = p; max_memory -= sizeof(trans_node_t); } } free(acsm->acsmTransTable); max_memory -= sizeof(void*) * acsm->acsmMaxStates; acsm->acsmTransTable = 0; return 0;}/** *//*static int List_FreeList( trans_node_t * t ){ int tcnt=0; trans_node_t *p; while( t ) { p = t->next; free(t); t = p; max_memory -= sizeof(trans_node_t); tcnt++; } return tcnt;}*//** Print the trans table to stdout*/static int List_PrintTransTable( ACSM_STRUCT2 * acsm ){ int i; trans_node_t * t; ACSM_PATTERN2 * patrn; if( !acsm->acsmTransTable ) return 0; printf("Print Transition Table- %d active states\n",acsm->acsmNumStates); for(i=0;i< acsm->acsmNumStates;i++) { t = acsm->acsmTransTable[i]; printf("state %3d: ",i); while( t ) { if( isprint(t->key) ) printf("%3c->%-5d\t",t->key,t->next_state); else printf("%3d->%-5d\t",t->key,t->next_state); t = t->next; } patrn =acsm->acsmMatchList[i]; while( patrn ) { printf("%.*s ",patrn->n,patrn->patrn); patrn = patrn->next; } printf("\n"); } return 0;}/** Converts row of states from list to a full vector format*/ static int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full ){ int tcnt = 0; trans_node_t * t = acsm->acsmTransTable[ state ]; memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize); if( !t ) return 0; while(t) { full[ t->key ] = t->next_state; tcnt++; t = t->next; } return tcnt;}/** Copy a Match List Entry - don't dup the pattern data*/ static ACSM_PATTERN2*CopyMatchListEntry (ACSM_PATTERN2 * px) { ACSM_PATTERN2 * p; p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2)); MEMASSERT (p, "CopyMatchListEntry"); memcpy (p, px, sizeof (ACSM_PATTERN2)); p->next = 0; return p;}/** Check if a pattern is in the list already,* validate it using the 'id' field. This must be unique* for every pattern.*//*staticint FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px) { ACSM_PATTERN2 * p; p = acsm->acsmMatchList[state]; while( p ) { if( p->id == px->id ) return 1; p = p->next; } return 0;}*//** Add a pattern to the list of patterns terminated at this state.* Insert at front of list.*/ static voidAddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px) { ACSM_PATTERN2 * p; p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2)); MEMASSERT (p, "AddMatchListEntry"); memcpy (p, px, sizeof (ACSM_PATTERN2)); p->next = acsm->acsmMatchList[state]; acsm->acsmMatchList[state] = p;}static voidAddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p) { int state, next, n; unsigned char *pattern; n = p->n; pattern = p->patrn; state = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -