⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regx.c

📁 包括文件操作、图形系统、并口串口编程、鼠标编程、小型cad系统、编译器、病毒防火墙、海底大战等多个实例。
💻 C
📖 第 1 页 / 共 2 页
字号:
                     if (!myiswhitespc( search_string[j-1] )) {
                        rc = put_dq( n1 );
                        if (deque[dq_tail] == SCAN)
                           --j;
                     }
                  }
               }
               break;
            default             :
               assert( FALSE );
               break;
         }
         if (rc == ERROR)
            return( 0 );
      } else {
         assert( nfa_pointer->node_type[state] == CNODE );

         n1 = nfa_pointer->next1[state];
         if (push_dq( n1 ) == ERROR)
            return( 0 );

         if (n1 != nfa_pointer->next2[state])
            if (push_dq( nfa_pointer->next2[state] ) == ERROR)
               return( 0 );
      }
      if (dequeempty( )  ||  j > search_len)
         return( ERROR );
      state = pop_dq( );
   }
   return( j - search_col );
}



int  put_dq( int v )
{
   if (queued_states[v+1] == 0) {
      ++queued_states[v+1];
      deque[dq_head] = v;
      dq_head--;
      if (dq_head < 0)
         dq_head = REGX_SIZE - 1;
      if (dq_tail == dq_head) {
         nfa_status = ERROR;
         show_search_message( NFA_GAVE_UP, g_display.diag_color );
         return( ERROR );
      }
   }
   return( OK );
}



int  push_dq( int v )
{
   ++dq_tail;
   if (dq_tail == dq_head) {
      nfa_status = ERROR;
      show_search_message( NFA_GAVE_UP, g_display.diag_color );
      return( ERROR );
   } else {
      if (dq_tail > REGX_SIZE - 1)
         dq_tail = 0;
      deque[dq_tail] = v;
      return( OK );
   }
}



int  pop_dq( void )
{
register int v;

   v = deque[dq_tail];
   dq_tail--;
   if (dq_tail < 0)
      dq_tail = REGX_SIZE - 1;
   return( v );
}



int  dequeempty( void )
{
   if (dq_tail > dq_head)
      return( dq_tail - dq_head == 1 );
   else
      return( dq_tail + REGX_SIZE - dq_head == 1 );
}






int  build_nfa( void )
{
   regx_rc = OK;

   init_nfa( );
   lookahead = 0;
   reg_len   = strlen( (char *)regx.pattern );
   parser_state = 1;

   nfa.next1[0] = expression( );
   if (regx_rc == OK) {
      emit_cnode( 0, START, nfa.next1[0], nfa.next1[0] );
      emit_cnode( parser_state, ACCEPT, 0, 0 );
      regx.node_count = parser_state + 2;
   }

   if (g_status.command == DefineRegXGrep) {
      memcpy( &sas_nfa, &nfa, sizeof(NFA_TYPE) );
      memcpy( &sas_regx, &regx, sizeof(REGX_INFO) );
   }
   return( regx_rc );
}



int  expression( void )
{
int t1;
int t2;
int r;

   t1 = term( );
   r = t1;
   if (regx.pattern[lookahead] == '|') {
      lookahead++;
      parser_state++;
      r = t2 = parser_state;
      parser_state++;
      emit_cnode( t2, OR_NODE, expression( ), t1 );
      emit_cnode( t2-1, JUXTA, parser_state, parser_state );

     
      if (nfa.node_type[t1] == CNODE)
         t1 = min( nfa.next1[t1], nfa.next2[t1] );
      nfa.next1[t1-1] = t2;
      if (nfa.node_type[t1-1] == NNODE)
         nfa.next2[t1-1] = t2;
   }
   return( r );
}



int  term( void )
{
int r;

   r = factor( );
   if (regx.pattern[lookahead] == '('  ||  letter( regx.pattern[lookahead] ))
      term( );
   else if (Kleene_star( regx.pattern[lookahead] ))
      regx_error( reg11 );
   return( r );
}



int  factor( void )
{
int t1;
int t2;
int r;
int c;
int paren;

   t1 = parser_state;
   c = regx.pattern[lookahead];
   if (c == '(') {
      lookahead++;
      t2 = expression( );
      if (regx.pattern[lookahead] == ')') {
         lookahead++;
         paren = TRUE;
      } else

         /*
          * unmatched open parens
          */
         regx_error( reg2 );
   } else if (letter( c )) {
      paren = FALSE;
      switch (c) {
         case ']' :

          
            regx_error( reg9 );
            break;
         case '.' :
            ttype = WILD;
            break;
         case ',' :
            ttype = WHITESPACE;
            break;
         case '^' :
            ttype = BOL;
            break;
         case '$' :
            ttype = EOL;
            break;
         case '<' :
            ttype = BOW;
            break;
         case '>' :
            ttype = EOW;
            break;
         case '\\' :
            ++lookahead;
            ttype =  mode.search_case == IGNORE ? IGNORE_ASCII : STRAIGHT_ASCII;
            if (lookahead != '\0') {
               if (regx.pattern[lookahead] != ':')
                  c = escape_char( regx.pattern[lookahead] );

              
               else {
                  c = regx.pattern[lookahead+1];
                  if (c != '\0') {
                     switch (c) {
                        case 'a' :
                           ++lookahead;
                           ttype = ALPHANUM;
                           break;
                        case 'b' :
                           ++lookahead;
                           ttype = WHITESPACE;
                           break;
                        case 'c' :
                           ++lookahead;
                           ttype = ALPHA;
                           break;
                        case 'd' :
                           ++lookahead;
                           ttype = DECIMAL;
                           break;
                        case 'h' :
                           ++lookahead;
                           ttype = HEX;
                           break;
                        case 'l' :
                           ++lookahead;
                           ttype = LOWER;
                           break;
                        case 'u' :
                           ++lookahead;
                           ttype = UPPER;
                           break;
                        default :
                           c = escape_char( regx.pattern[lookahead] );
                           break;
                     }
                  } else
                     c = escape_char( regx.pattern[lookahead] );
               }
            } else
               regx_error( reg4 );
            break;
         case '[' :
            memset( class_bits, 0, sizeof(char) * 32 );
            ++lookahead;
            if (lookahead != '\0') {
               c = regx.pattern[lookahead];
               if (c == '^') {
                  ++lookahead;
                  ttype = NOTCLASS;
               } else
                  ttype = CLASS;

               c1 = regx.pattern[lookahead];
               do {
                  class_bits[c1/8]  |=  bit[c1%8];
                  if (c1 != '\0')
                     ++lookahead;
                  if (regx.pattern[lookahead] == '-') {
                     ++lookahead;
                     c2 = regx.pattern[lookahead];
                     if (c2 != '\0') {

                       
                        if (c2 < c1) {
                           c  = c2;
                           c2 = c1;
                           c1 = c;
                        }

                        for (c=c1; c <= c2; c++)
                           class_bits[c/8] |= bit[c%8];

                        if (regx.pattern[lookahead] != '\0')
                           ++lookahead;
                     } else
                        regx_error( reg10 );
                  }
                  c1 = regx.pattern[lookahead];
               } while (c1  != '\0'  &&  c1 != ']');

               if (c1 == '\0')
                  regx_error( reg5 );
            } else
               regx_error( reg6 );
            break;
         default :
            if (mode.search_case == IGNORE) {
               c = tolower( c );
               ttype = IGNORE_ASCII;
            } else
               ttype = STRAIGHT_ASCII;
      }
      emit_nnode( parser_state, ttype, c, parser_state+1, parser_state+1 );
      if (ttype == CLASS  ||  ttype == NOTCLASS) {
         nfa.class[parser_state] = calloc( 32, sizeof(char) );
         if (nfa.class[parser_state] != NULL)
            memcpy( nfa.class[parser_state], class_bits, sizeof( char )*32 );
         else
            regx_error( reg7 );
      }
      t2 = parser_state;
      lookahead++;
      parser_state++;
   } else if (c == '\0')
      return( 0 );
   else {
      if (c == '*'  ||  c == '+'  ||  c == '?')
         regx_error( reg8 );
      else if (c  ==  ')')
         regx_error( reg3 );
      else
         regx_error( reg2 );
   }

   c = regx.pattern[lookahead];
   switch (c) {
      case '*' :
         emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
         r = parser_state;
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = parser_state;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = parser_state;
         lookahead++;
         parser_state++;
         paren = FALSE;
         break;
      case '+' :
         if (paren == TRUE) {
            emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
            parser_state++;
         }

         emit_cnode( parser_state, JUXTA, t2, t2 );
         r = parser_state;
         parser_state++;

         if (paren == FALSE) {
            nfa.next1[t2] = parser_state;
            if (nfa.node_type[t2] == NNODE)
               nfa.next2[t2] = parser_state;
         }

         emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = r;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = r;
         parser_state++;
         lookahead++;
         paren = FALSE;
         break;
      case '?' :
         emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
         parser_state++;
         r = parser_state;
         emit_cnode( parser_state, ZERO_OR_ONE, parser_state+1, t2 );
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = parser_state;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = parser_state;
         parser_state++;
         lookahead++;
         paren = FALSE;
         break;
      default  :
         r = t2;
         break;
   }

  
   if (paren) {
      emit_cnode( parser_state, JUXTA, parser_state+1, parser_state+1 );
      parser_state++;
   }
   return( r );
}



int  escape_char( int let )
{
   switch (let) {
      case '0' :
         let = 0x00;
         break;
      case 'a' :
         let = 0x07;
         break;
      case 'b' :
         let = 0x08;
         break;
      case 'n' :
         let = 0x0a;
         break;
      case 'r' :
         let = 0x0d;
         break;
      case 't' :
         let = 0x09;
         break;
      default  :
         break;
   }
   return( let );
}



void emit_cnode( int index, int ttype, int n1, int n2 )
{
   assert( index >= 0);
   assert( index < REGX_SIZE );

   nfa.node_type[index] = CNODE;
   nfa.term_type[index] = ttype;
   nfa.c[index] = 0;
   nfa.next1[index] = n1;
   nfa.next2[index] = n2;
}



void emit_nnode( int index, int ttype, int c, int n1, int n2 )
{
   assert( index >= 0);
   assert( index < REGX_SIZE );

   nfa.node_type[index] = NNODE;
   nfa.term_type[index] = ttype;
   nfa.c[index] = c;
   nfa.next1[index] = n1;
   nfa.next2[index] = n2;
}



void init_nfa( void )
{
int i;

   for (i=0; i < REGX_SIZE; i++) {
      nfa.node_type[i] = NNODE;
      nfa.term_type[i] = 0;
      nfa.c[i] = 0;
      nfa.next1[i] = 0;
      nfa.next2[i] = 0;
      if (nfa.class[i] != NULL)
         free( nfa.class[i] );
      nfa.class[i] = NULL;
   }
}



void regx_error( char *line )
{
   error( WARNING, regx_error_line, line );
   regx_rc = ERROR;
}


int  separator( int let )
{
   return( let == 0  ||  let == ')'  ||  let == '|' );
}



int  Kleene_star( int let )
{
   return( let == '*'  ||  let == '+'  ||  let == '?' );
}



int  letter( int let )
{
   return( !separator( let )  &&  !Kleene_star( let ) );
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -