⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 stop.c

📁 信息检索中常用的技术
💻 C
📖 第 1 页 / 共 2 页
字号:
            Part 2: If not found, add the label to the tree            Part 3: Return the state number   Notes:   This machine always returns a state with the given label            because if the machine does not have a state with the given            label, then one is created.**/static intGetState( machine, label, signature )   DFA machine;        /* in: DFA whose state labels are searched */   StrList label;      /* in: state label searched for */   unsigned signature; /* in: signature of the label requested */   {   SearchTree *ptr;       /* pointer to a search tree link field */   SearchTree new_node;   /* for a newly added search tree node */             /* Part 1: Search the tree for the requested state */   ptr = &(machine->tree);   while ( (NULL != *ptr) && (   (signature != (*ptr)->signature)                              || !StrListEqual((StrList)label,(*ptr)->label)) )      ptr = (signature <= (*ptr)->signature) ? &(*ptr)->left : &(*ptr)->right;             /* Part 2: If not found, add the label to the tree */   if ( NULL == *ptr )      {         /* create a new node and fill in its fields */      new_node = (SearchTree)GetMemory( NULL, sizeof(SearchTreeNode) );      new_node->signature = signature;      new_node->label = (StrList)label;      new_node->state = machine->num_states;      new_node->left = new_node->right = NULL;         /* allocate more states if needed, set up the new state */      if ( machine->num_states == machine->max_states )         {         machine->max_states += TABLE_INCREMENT;         machine->state_table =           (StateTable)GetMemory( (char *)machine->state_table,                                  machine->max_states*sizeof(StateTableEntry));         }      machine->state_table[machine->num_states].label = (StrList)label;      machine->num_states++;         /* hook the new node into the binary search tree */      *ptr = new_node;      }   else      StrListDestroy( (StrList)label );                   /* Part 3: Return the state number */   return( (*ptr)->state );   } /* GetState *//*FN***************************************************************************        AddArc( machine, state, arc_label, state_label, state_signature )   Returns: void   Purpose: Add an arc between two states in a DFA   Plan:    Part 1: Search for the target state among existing states            Part 2: Make sure the arc table is big enough            Part 3: Add the new arc   Notes:   None.**/static voidAddArc( machine, state, arc_label, state_label, state_signature )   DFA machine;              /* in/out: machine with an arc added */   int state;                /* in: with an out arc added */   char arc_label;           /* in: label on the new arc */   StrList state_label;      /* in: label on the target state */   unsigned state_signature; /* in: label hash signature to speed searching */   {   register int target;   /* destination state for the new arc */        /* Part 1: Search for the target state among existing states */   StrListSort( state_label );   target = GetState( machine, state_label, state_signature );               /* Part 2: Make sure the arc table is big enough */   if ( machine->num_arcs == machine->max_arcs )      {      machine->max_arcs += TABLE_INCREMENT;      machine->arc_table =         (ArcTable)GetMemory( (char *)machine->arc_table,                              machine->max_arcs * sizeof(ArcTableEntry) );      }                        /* Part 3: Add the new arc */   machine->arc_table[machine->num_arcs].label = arc_label;   machine->arc_table[machine->num_arcs].target = target;   machine->num_arcs++;   machine->state_table[state].num_arcs++;   } /* AddArc *//*FN***************************************************************************        BuildDFA( words )   Returns: DFA -- newly created finite state machine   Purpose: Build a DFA to recognize a list of words   Plan:    Part 1: Allocate space and initialize variables            Part 2: Make and label the DFA start state            Part 3: Main loop - build the state and arc tables            Part 4: Deallocate the binary search tree and the state labels            Part 5: Reallocate the tables to squish them down            Part 6: Return the newly constructed DFA   Notes:   None.**/DFABuildDFA( words )   StrList words;  /* in: that the machine is built to recognize */   {   DFA machine;               /* local for easier access to machine */   register int state;        /* current state's state number */   char arc_label;            /* for the current arc when adding arcs */   char *string;              /* element in a set of state labels */   char ch;                   /* the first character in a new string */   StrList current_label;     /* set of strings labeling a state */   StrList target_label;      /* labeling the arc target state */   unsigned target_signature; /* hashed label for binary search tree */   register int i;            /* for looping through strings */              /* Part 1: Allocate space and initialize variables */   machine = (DFA)GetMemory( NULL, sizeof(DFAStruct) );   machine->max_states = TABLE_INCREMENT;   machine->state_table =      (StateTable)GetMemory(NULL, machine->max_states*sizeof(StateTableEntry));   machine->num_states = 0;   machine->max_arcs = TABLE_INCREMENT;   machine->arc_table =      (ArcTable)GetMemory( NULL, machine->max_arcs * sizeof(ArcTableEntry) );   machine->num_arcs = 0;   machine->tree = NULL;                /* Part 2: Make and label the DFA start state */   StrListUnique( words );               /* sort and unique the list */   machine->state_table[0].label = words;   machine->num_states = 1;            /* Part 3: Main loop - build the state and arc tables */   for ( state = 0; state < machine->num_states; state++ )      {              /* The current state has nothing but a label, so */              /* the first order of business is to set up some */              /* of its other major fields                     */      machine->state_table[state].is_final = FALSE;      machine->state_table[state].arc_offset = machine->num_arcs;      machine->state_table[state].num_arcs = 0;             /* Add arcs to the arc table for the current state */             /* based on the state's derived set.  Also set the */             /* state's final flag if the empty string is found */             /* in the suffix list                              */      current_label = machine->state_table[state].label;      target_label = StrListCreate();      target_signature = HASH_START;      arc_label = EOS;      for ( i = 0; i < StrListSize(current_label); i++ )         {            /* get the next string in the label and lop it */         string = StrListPeek( current_label, i );         ch = *string++;            /* the empty string means mark this state as final */         if ( EOS == ch )            { machine->state_table[state].is_final = TRUE; continue; }            /* make sure we have a legitimate arc_label */         if ( EOS == arc_label ) arc_label = ch;            /* if the first character is new, then we must */            /* add an arc for the previous first character */         if ( ch != arc_label )            {            AddArc(machine, state, arc_label, target_label, target_signature);            target_label = StrListCreate();            target_signature = HASH_START;            arc_label = ch;            }            /* add the current suffix to the target state label */         StrListAppend( target_label, string );         target_signature += (*string + 1) * HASH_INCREMENT;         while ( *string ) target_signature += *string++;         }              /* On loop exit we have not added an arc for the */              /* last bunch of suffixes, so we must do so, as  */              /* long as the last set of suffixes is not empty */              /* (which happens when the current state label   */              /* is the singleton set of the empty string).    */      if ( 0 < StrListSize(target_label) )         AddArc( machine, state, arc_label, target_label, target_signature );      }      /* Part 4: Deallocate the binary search tree and the state labels */   DestroyTree( machine->tree );  machine->tree = NULL;   for ( i = 0; i < machine->num_states; i++ )      {      StrListDestroy( machine->state_table[i].label );      machine->state_table[i].label = NULL;      }            /* Part 5: Reallocate the tables to squish them down */   machine->state_table = (StateTable)GetMemory( (char *)machine->state_table,                               machine->num_states * sizeof(StateTableEntry) );   machine->arc_table = (ArcTable)GetMemory( (char *)machine->arc_table,                               machine->num_arcs * sizeof(ArcTableEntry) );                /* Part 6: Return the newly constructed DFA */   return( machine );   } /* BuildDFA *//*FN***************************************************************************        GetTerm( stream, machine, size, output )   Returns: char * -- NULL if stream is exhausted, otherwise output buffer   Purpose: Get the next token from an input stream, filtering stop words   Plan:    Part 1: Return NULL immediately if there is no input            Part 2: Initialize the local variables            Part 3: Main Loop: Put an unfiltered word into the output buffer            Part 4: Return the output buffer   Notes:   This routine runs the DFA provided as the machine parameter,            and collects the text of any term in the output buffer.  If            a stop word is recognized in this process, it is skipped.            Care is also taken to be sure not to overrun the output buffer.**/char *GetTerm( stream, machine, size, output )   FILE *stream;    /* in: source of input characters */   DFA machine;     /* in: finite state machine driving process */   int size;        /* in: bytes in the output buffer */   char *output;    /* in/out: where the next token in placed */   {   char *outptr;        /* for scanning through the output buffer */   int ch;              /* current character during input scan */   register int state;  /* current state during DFA execution */           /* Part 1: Return NULL immediately if there is no input */   if ( EOF == (ch = getc(stream)) ) return( NULL );                  /* Part 2: Initialize the local variables */   outptr = output;     /* Part 3: Main Loop: Put an unfiltered word into the output buffer */   do      {         /* scan past any leading delimiters */      while ( (EOF != ch ) &&              ((DELIM_CH == char_class[ch]) ||               (DIGIT_CH == char_class[ch])) ) ch = getc( stream );         /* start the machine in its start state */      state = 0;         /* copy input to output until reaching a delimiter, and also */         /* run the DFA on the input to watch for filtered words      */      while ( (EOF != ch) && (DELIM_CH != char_class[ch]) )         {         if ( outptr == (output+size-1) ) { outptr = output; state = 0; }         *outptr++ = convert_case[ch];         if ( DEAD_STATE != state )            {            register int i;   /* for scanning through arc labels */            int arc_start;    /* where the arc label list starts */            int arc_end;      /* where the arc label list ends */            arc_start = machine->state_table[state].arc_offset;            arc_end = arc_start + machine->state_table[state].num_arcs;            for ( i = arc_start; i < arc_end; i++ )               if ( convert_case[ch] == machine->arc_table[i].label )                  { state = machine->arc_table[i].target; break; }            if ( i == arc_end ) state = DEAD_STATE;            }         ch = getc( stream );         }         /* start from scratch if a stop word is recognized */      if ( (DEAD_STATE != state) && machine->state_table[state].is_final )         outptr = output;         /* terminate the output buffer */      *outptr = EOS;      }   while ( (EOF != ch) && !*output );                    /* Part 4: Return the output buffer */   return( output );   } /* GetTerm */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -