⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regx.c

📁 C语言实战105例 随书光盘 王为青 张圣亮编著 中国邮电出版社出版
💻 C
📖 第 1 页 / 共 3 页
字号:
{
int t1;
int t2;
int r;
int c;
int paren;

   t1 = parser_state;
   c = regx.pattern[lookahead];
   if (c == '(') {
      lookahead++;
      t2 = expression( );
      if (regx.pattern[lookahead] == ')') {
         lookahead++;
         paren = TRUE;
      } else

         /*
          * unmatched open parens
          */
         regx_error( reg2 );
   } else if (letter( c )) {
      paren = FALSE;
      switch (c) {
         case ']' :

            /*
             * unmatched close bracket
             */
            regx_error( reg9 );
            break;
         case '.' :
            ttype = WILD;
            break;
         case ',' :
            ttype = WHITESPACE;
            break;
         case '^' :
            ttype = BOL;
            break;
         case '$' :
            ttype = EOL;
            break;
         case '<' :
            ttype = BOW;
            break;
         case '>' :
            ttype = EOW;
            break;
         case '\\' :
            ++lookahead;
            ttype =  mode.search_case == IGNORE ? IGNORE_ASCII : STRAIGHT_ASCII;
            if (lookahead != '\0') {
               if (regx.pattern[lookahead] != ':')
                  c = escape_char( regx.pattern[lookahead] );

               /*
                * predefined unix-like macros.
                */
               else {
                  c = regx.pattern[lookahead+1];
                  if (c != '\0') {
                     switch (c) {
                        case 'a' :
                           ++lookahead;
                           ttype = ALPHANUM;
                           break;
                        case 'b' :
                           ++lookahead;
                           ttype = WHITESPACE;
                           break;
                        case 'c' :
                           ++lookahead;
                           ttype = ALPHA;
                           break;
                        case 'd' :
                           ++lookahead;
                           ttype = DECIMAL;
                           break;
                        case 'h' :
                           ++lookahead;
                           ttype = HEX;
                           break;
                        case 'l' :
                           ++lookahead;
                           ttype = LOWER;
                           break;
                        case 'u' :
                           ++lookahead;
                           ttype = UPPER;
                           break;
                        default :
                           c = escape_char( regx.pattern[lookahead] );
                           break;
                     }
                  } else
                     c = escape_char( regx.pattern[lookahead] );
               }
            } else
               regx_error( reg4 );
            break;
         case '[' :
            memset( class_bits, 0, sizeof(char) * 32 );
            ++lookahead;
            if (lookahead != '\0') {
               c = regx.pattern[lookahead];
               if (c == '^') {
                  ++lookahead;
                  ttype = NOTCLASS;
               } else
                  ttype = CLASS;

               c1 = regx.pattern[lookahead];
               do {
                  class_bits[c1/8]  |=  bit[c1%8];
                  if (c1 != '\0')
                     ++lookahead;
                  if (regx.pattern[lookahead] == '-') {
                     ++lookahead;
                     c2 = regx.pattern[lookahead];
                     if (c2 != '\0') {

                        /*
                         * just in case the hi for the range is given first,
                         *  switch c1 and c2,  e.g. [9-0].
                         */
                        if (c2 < c1) {
                           c  = c2;
                           c2 = c1;
                           c1 = c;
                        }

                        for (c=c1; c <= c2; c++)
                           class_bits[c/8] |= bit[c%8];

                        if (regx.pattern[lookahead] != '\0')
                           ++lookahead;
                     } else
                        regx_error( reg10 );
                  }
                  c1 = regx.pattern[lookahead];
               } while (c1  != '\0'  &&  c1 != ']');

               if (c1 == '\0')
                  regx_error( reg5 );
            } else
               regx_error( reg6 );
            break;
         default :
            if (mode.search_case == IGNORE) {
               c = tolower( c );
               ttype = IGNORE_ASCII;
            } else
               ttype = STRAIGHT_ASCII;
      }
      emit_nnode( parser_state, ttype, c, parser_state+1, parser_state+1 );
      if (ttype == CLASS  ||  ttype == NOTCLASS) {
         nfa.class[parser_state] = calloc( 32, sizeof(char) );
         if (nfa.class[parser_state] != NULL)
            memcpy( nfa.class[parser_state], class_bits, sizeof( char )*32 );
         else
            regx_error( reg7 );
      }
      t2 = parser_state;
      lookahead++;
      parser_state++;
   } else if (c == '\0')
      return( 0 );
   else {
      if (c == '*'  ||  c == '+'  ||  c == '?')
         regx_error( reg8 );
      else if (c  ==  ')')
         regx_error( reg3 );
      else
         regx_error( reg2 );
   }

   c = regx.pattern[lookahead];
   switch (c) {
      case '*' :
         emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
         r = parser_state;
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = parser_state;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = parser_state;
         lookahead++;
         parser_state++;
         paren = FALSE;
         break;
      case '+' :
         if (paren == TRUE) {
            emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
            parser_state++;
         }

         emit_cnode( parser_state, JUXTA, t2, t2 );
         r = parser_state;
         parser_state++;

         if (paren == FALSE) {
            nfa.next1[t2] = parser_state;
            if (nfa.node_type[t2] == NNODE)
               nfa.next2[t2] = parser_state;
         }

         emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = r;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = r;
         parser_state++;
         lookahead++;
         paren = FALSE;
         break;
      case '?' :
         emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
         parser_state++;
         r = parser_state;
         emit_cnode( parser_state, ZERO_OR_ONE, parser_state+1, t2 );
         if (nfa.node_type[t1] == CNODE)
            t1 = min( nfa.next1[t1], nfa.next2[t1] );
         nfa.next1[t1-1] = parser_state;
         if (nfa.node_type[t1-1] == NNODE)
            nfa.next2[t1-1] = parser_state;
         parser_state++;
         lookahead++;
         paren = FALSE;
         break;
      default  :
         r = t2;
         break;
   }

   /*
    * close parens seem to need a JUXTA node to gather all reg ex's
    *  to a common point.
    */
   if (paren) {
      emit_cnode( parser_state, JUXTA, parser_state+1, parser_state+1 );
      parser_state++;
   }
   return( r );
}


/*
 * Name:    escape_char
 * Purpose: recognize escape and C escape sequences
 * Date:    June 5, 1993
 * Passed:  let:  letter to escape
 * Returns: escaped letter
 */
int  escape_char( int let )
{
   switch (let) {
      case '0' :
         let = 0x00;
         break;
      case 'a' :
         let = 0x07;
         break;
      case 'b' :
         let = 0x08;
         break;
      case 'n' :
         let = 0x0a;
         break;
      case 'r' :
         let = 0x0d;
         break;
      case 't' :
         let = 0x09;
         break;
      default  :
         break;
   }
   return( let );
}


/*
 * Name:    emit_cnode
 * Purpose: add a null node to our pattern matching machine
 * Date:    June 5, 1993
 * Passed:  index:  current node in nfa
 *          ttype:  terminal type - CLOSURE, OR, JUXTA, etc...
 *          n1:     pointer to next state, path for lambda transitions
 *          n2:     pointer to other next state, usually a NNODE
 * Returns: none, but modifies local global nfa.
 */
void emit_cnode( int index, int ttype, int n1, int n2 )
{
   assert( index >= 0);
   assert( index < REGX_SIZE );

   nfa.node_type[index] = CNODE;
   nfa.term_type[index] = ttype;
   nfa.c[index] = 0;
   nfa.next1[index] = n1;
   nfa.next2[index] = n2;
}


/*
 * Name:    emit_nnode
 * Purpose: add a to our pattern matching machine
 * Date:    June 5, 1993
 * Passed:  index:  current node in nfa
 *          ttype:  terminal type - EOL, ASCII, etc...
 *          c:      letter this node recognizes
 *          n1:     pointer to next state
 *          n2:     pointer to other next state, which can be same as n1
 * Returns: none, but modifies local global nfa.
 */
void emit_nnode( int index, int ttype, int c, int n1, int n2 )
{
   assert( index >= 0);
   assert( index < REGX_SIZE );

   nfa.node_type[index] = NNODE;
   nfa.term_type[index] = ttype;
   nfa.c[index] = c;
   nfa.next1[index] = n1;
   nfa.next2[index] = n2;
}


/*
 * Name:    init_nfa
 * Purpose: set local global nfa to NULL state
 * Date:    June 5, 1993
 * Passed:  none
 */
void init_nfa( void )
{
int i;

   for (i=0; i < REGX_SIZE; i++) {
      nfa.node_type[i] = NNODE;
      nfa.term_type[i] = 0;
      nfa.c[i] = 0;
      nfa.next1[i] = 0;
      nfa.next2[i] = 0;
      if (nfa.class[i] != NULL)
         free( nfa.class[i] );
      nfa.class[i] = NULL;
   }
}


/*
 * Name:    regx_error
 * Purpose: display reg ex error message and set reg ex error code
 * Date:    June 5, 1993
 * Passed:  line:  line to display error
 * Returns: none, but sets reg ex return code to error.
 */
void regx_error( char *line )
{
   error( WARNING, regx_error_line, line );
   regx_rc = ERROR;
}


/*
 * Name:    separator
 * Purpose: determine if character is a reg ex separator
 * Date:    June 5, 1993
 * Passed:  let:  letter to look at
 * Returns: whether or not 'let' is a separator
 */
int  separator( int let )
{
   return( let == 0  ||  let == ')'  ||  let == '|' );
}


/*
 * Name:    Kleene_star
 * Purpose: determine if character is a reg ex operator
 * Date:    June 5, 1993
 * Passed:  let:  letter to look at
 * Returns: whether or not 'let' is a letter
 */
int  Kleene_star( int let )
{
   return( let == '*'  ||  let == '+'  ||  let == '?' );
}


/*
 * Name:    letter
 * Purpose: determine if character is a recognized reg ex character
 * Date:    June 5, 1993
 * Passed:  let:  letter to look at
 * Returns: whether or not 'let' is a letter.
 */
int  letter( int let )
{
   return( !separator( let )  &&  !Kleene_star( let ) );
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -