📄 regx.c
字号:
* Date: June 5, 1993 * Passed: none * Returns: none, but modifies local global nfa. * Notes: this function does most of the compiling. it recognizes all * NNODEs, predefined macros, escape characters, and operators. */int factor( void ){int t1;int t2;int r;int c;int sub = 0;int bq = ERROR; t2 = t1 = parser_state; c = regx.pattern[lookahead]; if (c == '(') { if (regx.pattern[++lookahead] == '?') { c = regx.pattern[++lookahead]; if (c != ':' && c != '=' && c != '!') { /* * unrecognized character after (? */ regx_error( reg13 ); return( 0 ); } ++lookahead; bq = TRUE; if (c != ':') { emit_cnode( parser_state, (c == '!') ? ASSERTNOT : ASSERT, parser_state+1, parser_state+1 ); parser_state++; bq++; } } else { sub = subs_found++; emit_nnode( parser_state, BOB, sub, parser_state+1, parser_state+1 ); parser_state++; t1 = parser_state; bq = FALSE; } t2 = expression( ); if (regx.pattern[lookahead] == ')') { if (bq) { if (!Kleene_star( regx.pattern[lookahead+1] )) { /* * close parens seem to need a JUXTA node to gather all reg ex's * to a common point. */ emit_cnode(parser_state, JUXTA, parser_state+1, parser_state+1); parser_state++; } if (bq > TRUE && regx.pattern[lookahead+1] != '\0') /* * assertion must be last */ regx_error( reg14 ); } lookahead++; } else /* * unmatched open parens */ regx_error( reg2 ); } else if (letter( c )) { switch (c) { case ']' : /* * unmatched close bracket */ regx_error( reg9 ); break; case '.' : ttype = WILD; break; case ',' : ttype = BLANK; break; case '^' : ttype = BOL; break; case '$' : ttype = EOL; break; case '<' : ttype = BOW; if (regx.pattern[lookahead+1] == '<') { ++lookahead; ttype = BOS; } break; case '>' : ttype = EOW; if (regx.pattern[lookahead+1] == '>') { ++lookahead; ttype = EOS; } break; case '\\' : c = regx.pattern[++lookahead]; ttype = mode.search_case == IGNORE ? IGNORE_ASCII : STRAIGHT_ASCII; if (c != '\0') { if (c >= '1' && c <= '9') { c -= '0'; if (c >= subs_found) regx_error( reg12 ); ttype = BACKREF; } else if (c != ':') c = escape_char( c ); /* * predefined unix-like macros. */ else { c = regx.pattern[++lookahead]; switch (bj_tolower( c )) { case REG_ALPHANUM : ttype = ALPHANUM; break; case REG_WHITESPACE : ttype = WHITESPACE; break; case REG_ALPHA : ttype = ALPHA; break; case REG_DECIMAL : ttype = DECIMAL; break; case REG_HEX : ttype = HEX; break; case REG_LOWER : ttype = LOWER; break; case REG_UPPER : ttype = UPPER; break; default : c = ':'; break; } if (ttype != IGNORE_ASCII && ttype != STRAIGHT_ASCII) c = !bj_islower( c ); } } else regx_error( reg4 ); break; case '[' : memset( class_bits, 0, 32 ); c = regx.pattern[++lookahead]; if (c != '\0') { if (c == '^') { ++lookahead; ttype = NOTCLASS; } else ttype = CLASS; c1 = regx.pattern[lookahead]; if (c1 != '\0') do { c2 = 0; if (c1 == '\\' && regx.pattern[lookahead+1] != '\0') { c1 = regx.pattern[++lookahead]; if (c1 != ':') c1 = escape_char( c1 ); else { c1 = regx.pattern[++lookahead]; switch (bj_tolower( c1 )) { case REG_ALPHANUM : c2 = BJ_alpha|BJ_digit; break; case REG_WHITESPACE : c2 = BJ_space; break; case REG_ALPHA : c2 = BJ_alpha; break; case REG_DECIMAL : c2 = BJ_digit; break; case REG_HEX : c2 = BJ_xdigit; break; case REG_LOWER : c2 = BJ_lower; break; case REG_UPPER : c2 = BJ_upper; break; default : --lookahead; break; } if (c2) c1 = !bj_islower( c1 ); } } if (c2) { for (c = 0; c <= 255; c++) if (!(bj_ctype[c] & c2) == c1) class_bits[c/8] |= bit[c%8]; } else { c2 = c1; if (regx.pattern[++lookahead] == '-') { c2 = regx.pattern[++lookahead]; if (c2 != '\0') { if (c2 == '\\' && regx.pattern[lookahead+1] != '\0') c2 = escape_char( regx.pattern[++lookahead] ); ++lookahead; /* * just in case the hi for the range is given first, * switch c1 and c2, e.g. [9-0]. */ if (c2 < c1) { c = c2; c2 = c1; c1 = c; } } else regx_error( reg10 ); } if (mode.search_case == IGNORE) { for (; c1 <= c2; c1++) { c = bj_tolower( c1 ); class_bits[c/8] |= bit[c%8]; c = bj_toupper( c1 ); class_bits[c/8] |= bit[c%8]; } } else for (c = c1; c <= c2; c++) class_bits[c/8] |= bit[c%8]; } c1 = regx.pattern[lookahead]; } while (c1 != '\0' && c1 != ']'); if (c1 == '\0') regx_error( reg5 ); } else regx_error( reg6 ); break; default : if (mode.search_case == IGNORE) { c = bj_tolower( c ); ttype = IGNORE_ASCII; } else ttype = STRAIGHT_ASCII; } emit_nnode( parser_state, ttype, c, parser_state+1, parser_state+1 ); if (ttype == CLASS || ttype == NOTCLASS) { nfa.class[parser_state] = malloc( 32 ); if (nfa.class[parser_state] != NULL) memcpy( nfa.class[parser_state], class_bits, 32 ); else regx_error( reg7 ); } t2 = parser_state; lookahead++; parser_state++; if (ttype >= BOL && ttype <= EOS && Kleene_star(regx.pattern[lookahead])) regx_error( reg8 ); } else if (c == '\0') return( 0 ); else { if (Kleene_star( c )) regx_error( reg8 ); else if (c == ')') regx_error( reg3 ); else regx_error( reg2 ); } c = regx.pattern[lookahead]; switch (c) { case '*' : emit_cnode( parser_state, CLOSURE, parser_state+1, t2 ); r = parser_state; if (nfa.node_type[t1] == CNODE) t1 = min( nfa.next1[t1], nfa.next2[t1] ); nfa.next1[t1-1] = parser_state; if (nfa.node_type[t1-1] == NNODE) nfa.next2[t1-1] = parser_state; parser_state++; lookahead++; break; case '+' : emit_cnode( parser_state, CLOSURE, parser_state+1, t2 ); r = t2; parser_state++; lookahead++; break; case '?' : emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 ); parser_state++; r = parser_state; emit_cnode( parser_state, ZERO_OR_ONE, parser_state+1, t2 ); if (nfa.node_type[t1] == CNODE) t1 = min( nfa.next1[t1], nfa.next2[t1] ); nfa.next1[t1-1] = parser_state; if (nfa.node_type[t1-1] == NNODE) nfa.next2[t1-1] = parser_state; parser_state++; lookahead++; break; default : r = t2; break; } if (regx.pattern[lookahead] == '?') { /* * swap the previous node's next1 and next2 to implement non-greedy */ nfa.next2[parser_state-1] = nfa.next1[parser_state-1]; nfa.next1[parser_state-1] = t2; lookahead++; } if (!bq) { emit_nnode( parser_state, EOB, sub, parser_state+1, parser_state+1 ); parser_state++; } return( r );}/* * Name: escape_char * Purpose: recognize escape and C escape sequences * Date: June 5, 1993 * Passed: let: letter to escape * Returns: escaped letter * * jmh 991006: added \e for ESC, \f for form-feed and \v for vertical tab */int escape_char( int let ){ switch (let) { case '0' : let = 0x00; break; case 'a' : let = 0x07; break; case 'b' : let = 0x08; break; case 'e' : let = 0x1b; break; case 'f' : let = 0x0c; break; case 'n' : let = 0x0a; break; case 'r' : let = 0x0d; break; case 't' : let = 0x09; break; case 'v' : let = 0x0b; break; } return( let );}/* * Name: emit_cnode * Purpose: add a null node to our pattern matching machine * Date: June 5, 1993 * Passed: index: current node in nfa * ttype: terminal type - CLOSURE, OR, JUXTA, etc... * n1: pointer to next state, path for lambda transitions * n2: pointer to other next state, usually a NNODE * Returns: none, but modifies local global nfa. */void emit_cnode( int index, int ttype, int n1, int n2 ){ assert( index >= 0); assert( index < REGX_SIZE ); nfa.node_type[index] = CNODE; nfa.term_type[index] = ttype; nfa.c[index] = 0; nfa.next1[index] = n1; nfa.next2[index] = n2;}/* * Name: emit_nnode * Purpose: add a to our pattern matching machine * Date: June 5, 1993 * Passed: index: current node in nfa * ttype: terminal type - EOL, ASCII, etc... * c: letter this node recognizes * n1: pointer to next state * n2: pointer to other next state, which can be same as n1 * Returns: none, but modifies local global nfa. */void emit_nnode( int index, int ttype, int c, int n1, int n2 ){ assert( index >= 0); assert( index < REGX_SIZE ); nfa.node_type[index] = NNODE; nfa.term_type[index] = ttype; nfa.c[index] = c; nfa.next1[index] = n1; nfa.next2[index] = n2;}/* * Name: init_nfa * Purpose: set local global nfa to NULL state * Date: June 5, 1993 * Passed: none */void init_nfa( void ){int i; for (i = 0; i < REGX_SIZE; i++) { nfa.node_type[i] = NNODE; nfa.term_type[i] = 0; nfa.c[i] = 0; nfa.next1[i] = 0; nfa.next2[i] = 0; if (nfa.class[i] != NULL) free( nfa.class[i] ); nfa.class[i] = NULL; }}/* * Name: regx_error * Purpose: display reg ex error message and set reg ex error code * Date: June 5, 1993 * Passed: line: line to display error * Returns: none, but sets reg ex return code to error. * * jmh 010528: If regx_error_line is zero, assume this is called due to the * the command line grep and set g_status.errmsg, instead. * jmh 031130: remove regx_error_line, use g_status.current_window instead. */void regx_error( const char *line ){ if (g_status.current_window == NULL) g_status.errmsg = line; else error( WARNING, (g_status.command == RegXForward || g_status.command == RegXBackward) ? g_status.current_window->bottom_line : g_display.end_line, line ); regx_rc = ERROR;}/* * Name: separator * Purpose: determine if character is a reg ex separator * Date: June 5, 1993 * Passed: let: letter to look at * Returns: whether or not 'let' is a separator */int separator( int let ){ return( let == '\0' || let == ')' || let == '|' );}/* * Name: Kleene_star * Purpose: determine if character is a reg ex operator * Date: June 5, 1993 * Passed: let: letter to look at * Returns: whether or not 'let' is a letter */int Kleene_star( int let ){ return( let == '*' || let == '+' || let == '?' );}/* * Name: letter * Purpose: determine if character is a recognized reg ex character * Date: June 5, 1993 * Passed: let: letter to look at * Returns: whether or not 'let' is a letter. */int letter( int let ){ return( !separator( let ) && !Kleene_star( let ) );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -