📄 regx.c
字号:
if (*rline == file->block_br) { if (search_col < bc) return( NULL ); } } else { search_len = ec + 1; if (search_col > search_len) search_col = search_len; } search_string += bc; search_len -= bc; search_col -= bc; } else bc = 0; anchor = nfa.next1[0]; anchor = (nfa.node_type[anchor] == NNODE && nfa.term_type[anchor] == BOL); while (!g_status.control_break) { if (anchor) { if (search_col >= 0) { search_col = 0; if (nfa_match( ) != ERROR) { if (g_status.search & SEARCH_RESULTS) { if (!add_search_line( ll, *rline, 0 )) return( NULL ); } else { *col = bc; return( ll ); } } } } else { for (; search_col >= 0; search_col--) { if (nfa_match( ) != ERROR) { if (g_status.search & SEARCH_RESULTS) { if (!add_search_line( ll, *rline, search_col )) return( NULL ); break; } else { *col = search_col + bc; return( ll ); } } } } ll = ll->prev; if (ll->len == EOF) return( NULL ); --*rline; search_string = ll->line; search_col = search_len = ll->len; if ((g_status.search & SEARCH_BLOCK) && (file->block_type == BOX || (file->block_type == STREAM && *rline == file->block_br))) { bc = file->block_bc; ec = (file->block_ec == -1) ? 0 : file->block_ec; if (file->inflate_tabs) { bc = entab_adjust_rcol( search_string, search_len, bc, file->ptab_size ); ec = entab_adjust_rcol( search_string, search_len, ec, file->ptab_size ); } if (bc > search_len) bc = search_len; if (ec >= search_len) ec = search_len - 1; if (file->block_type == BOX) search_len = ec + 1; search_string += bc; search_len -= bc; search_col = search_len; } } return( NULL );}/****************************** NFA Recognizer *********************///#define SHOW_NFA//#define SHOW_SUBS//#define SHOW_STATE/* * Name: nfa_match * Purpose: try to recognize a pattern * Date: June 5, 1993 * Passed: none, but modifies local global nfa recognizer. * Returns: length of recognized pattern if found or ERROR if not found. */int nfa_match1( int state, int j );int nfa_match( void ){int len; memset( subs_pos, 0, sizeof(subs_pos) ); memset( subs_len, 0, sizeof(subs_len) ); sort.order_array = (mode.search_case == IGNORE) ? sort_order.ignore : sort_order.match; len = nfa_match1( nfa_pointer->next1[0], search_col ); if (len != ERROR) subs_len[0] = found_len = len;#if defined( SHOW_SUBS ) if (subs_found > 1) {# if defined( SHOW_STATE ) putchar( '\n' );# endif fputs( "Subs:", stdout ); for (int i = 1; i < subs_found; ++i) printf( " %d:%d", subs_pos[i], subs_len[i] ); putchar( '\n' );# if defined( SHOW_STATE ) || defined( SHOW_NFA ) putchar( '\n' );# endif }#endif return( len );}int nfa_match1( int state, int j ){register int c;int i;int n1, n2;int rc; c = j < search_len ? search_string[j] : EOF; while (state) {#if defined( SHOW_STATE ) printf( "%d: ", state );#endif n1 = nfa_pointer->next1[state]; if (nfa_pointer->node_type[state] == NNODE) { rc = OK; switch (nfa_pointer->term_type[state]) { case STRAIGHT_ASCII : if (c != EOF && nfa_pointer->c[state] == c) break; return( ERROR ); case IGNORE_ASCII : if (c != EOF && nfa_pointer->c[state] == bj_tolower( c )) break; return( ERROR ); case WILD : if (c != EOF) break; return( ERROR ); case CLASS : if (c != EOF && nfa_pointer->class[state][c/8] & bit[c%8]) break; return( ERROR ); case NOTCLASS : if (c != EOF && !(nfa_pointer->class[state][c/8] & bit[c%8])) break; return( ERROR ); case BLANK : if (c == ' ' || c == '\t') break; return( ERROR ); case WHITESPACE : if (c != EOF && !bj_isspace( c ) == nfa_pointer->c[state]) break; return( ERROR ); case ALPHA : if (c != EOF && !bj_isalpha( c ) == nfa_pointer->c[state]) break; return( ERROR ); case UPPER : if (c != EOF && !bj_isupper( c ) == nfa_pointer->c[state]) break; return( ERROR ); case LOWER : if (c != EOF && !bj_islower( c ) == nfa_pointer->c[state]) break; return( ERROR ); case ALPHANUM : if (c != EOF && !bj_isalnum( c ) == nfa_pointer->c[state]) break; return( ERROR ); case DECIMAL : if (c != EOF && !bj_isdigit( c ) == nfa_pointer->c[state]) break; return( ERROR ); case HEX : if (c != EOF && !bj_isxdigit( c ) == nfa_pointer->c[state]) break; return( ERROR ); case BOL : if (j == 0) { rc = TRUE; break; } return( ERROR ); case EOL : if (j == search_len) { rc = TRUE; break; } return( ERROR ); case BOW : if (c != EOF && !myiswhitespc( c )) { if (j == 0 || myiswhitespc( search_string[j-1] )) { rc = TRUE; break; } } return( ERROR ); case EOW : if (c == EOF) { if (search_len > 0) { if (!myiswhitespc( search_string[search_len-1] )) { rc = TRUE; break; } } } else { if (myiswhitespc( c )) { if (j > 0 && !myiswhitespc( search_string[j-1] )) { rc = TRUE; break; } } } return( ERROR ); case BOS : if (c != EOF && !bj_isspace( c )) { if (j == 0 || bj_isspace( search_string[j-1] )) { rc = TRUE; break; } } return( ERROR ); case EOS : if (c == EOF) { if (search_len > 0) { if (!bj_isspace( search_string[search_len-1] )) { rc = TRUE; break; } } } else { if (bj_isspace( c )) { if (j > 0 && !bj_isspace( search_string[j-1] )) { rc = TRUE; break; } } } return( ERROR ); case BOB : i = nfa_pointer->c[state]; if (i < 10) subs_pos[i] = j - search_col; rc = TRUE; break; case EOB : i = nfa_pointer->c[state]; if (i < 10) subs_len[i] = j - search_col - subs_pos[i]; rc = TRUE; break; case BACKREF : i = nfa_pointer->c[state]; if (subs_len[i] > 0 && subs_len[i] <= search_len - j) { if (my_memcmp( search_string + j, search_string + search_col + subs_pos[i], subs_len[i] ) == 0) { j += subs_len[i] - 1; break; } } return( ERROR ); default : assert( FALSE ); return( ERROR ); } if (rc == OK) { ++j; c = j < search_len ? search_string[j] : EOF; } state = n1; } else { assert( nfa_pointer->node_type[state] == CNODE ); if (nfa_pointer->term_type[state] == ASSERT) { if (nfa_match1( n1, j ) != ERROR) break; return( ERROR ); } if (nfa_pointer->term_type[state] == ASSERTNOT) { if (nfa_match1( n1, j ) == ERROR) break; return( ERROR ); } n2 = nfa_pointer->next2[state]; if (n1 == n2) { state = n1; continue; } /* * special case for greedy .* - it will always match everything, * so go straight to the end and work backwards. */ if (n2 == state - 1 && nfa_pointer->node_type[n2] == NNODE && nfa_pointer->term_type[n2] == WILD) { rc = ERROR; for (i = search_len; i >= j; --i) if ((rc = nfa_match1( n1, i )) != ERROR) break; } else { rc = nfa_match1( n2, j ); if (rc == ERROR) { state = n1; continue; } } return( rc ); } } return( j - search_col );}/*************************** NFA Compiler **************************//* * Name: build_nfa * Purpose: initialize nfa and call the compiler * Date: June 5, 1993 * Passed: none, but looks at local global pattern. * Returns: whether or not an ERROR occured * Notes: sets up the initial variables for the compiler. builds * initial and acceptor states for the nfa after compiler finishes. */int build_nfa( void ){ regx_rc = OK; init_nfa( ); subs_found = 1; /* 0 is always found */ lookahead = 0; parser_state = 1; nfa.next1[0] = expression( ); if (regx_rc == OK) { emit_cnode( 0, START, nfa.next1[0], nfa.next1[0] ); emit_cnode( parser_state, ACCEPT, 0, 0 ); regx.node_count = parser_state + 2; }#if defined( SHOW_NFA ) { int i, n; static char* ntype[] = { "CNODE", "NNODE" }; static char* ttype[2][22] = { { "START ", "ACCEPT", "OR ", "JUXTA ", "CLOSE ", "OPTION", "ASSERT", "ASERT!" }, { "ASCII ", "IASCII", "WILD ", "CLASS ", "!CLASS", "BLANK ", "SPACE ", "ALPHA ", "UPPER ", "LOWER ", "ALNUM ", "DECI ", "HEX ", "BOL ", "EOL ", "BOW ", "EOW ", "BOS ", "EOS ", "BOB ", "EOB ", "BACKRF" } }; for (i = 0; nfa.term_type[i] != 0 || i == 0; i++) { n = nfa.node_type[i]; printf( "%2d: %s, %s, n1 = %2d, n2 = %2d", i, ntype[n], ttype[n][nfa.term_type[i]], nfa.next1[i], nfa.next2[i] ); if (n == NNODE) { printf( ", c = " ); if (nfa.c[i] < 32) printf( "\\%c", nfa.c[i] + '0' ); else putchar( nfa.c[i] ); } putchar( '\n' ); } putchar( '\n' ); }#endif if (g_status.command == DefineGrep) { memcpy( &sas_nfa, &nfa, sizeof(NFA_TYPE) ); memcpy( &sas_regx, ®x, sizeof(REGX_INFO) ); } return( regx_rc );}/* * Name: expression * Purpose: break reg ex into terms and expressions * Date: June 5, 1993 * Passed: none, but looks at local global pattern. * Returns: none * Notes: because recursion can go fairly deep, almost all variables * were made local global. no need to save most of this stuff * on the stack. * * jmh 050707: fixed a bug with '?' followed by bracketed alternatives. */int expression( void ){int t1;int t2;int r; t1 = term( ); r = t1; if (regx.pattern[lookahead] == '|') { lookahead++; parser_state++; r = t2 = parser_state; parser_state++; emit_cnode( t2, OR_NODE, expression( ), t1 ); emit_cnode( t2-1, JUXTA, parser_state, parser_state ); /* * the OR_NODE seems to need a path from the previous node */ if (nfa.node_type[t1] == CNODE) t1 = min( nfa.next1[t1], nfa.next2[t1] ); nfa.next1[t1-1] = t2; if (nfa.node_type[t1-1] == NNODE) nfa.next2[t1-1] = t2; else if (nfa.term_type[t1-1] == ZERO_OR_ONE) nfa.next1[t1-2] = nfa.next2[t1-2] = t2; } return( r );}/* * Name: term * Purpose: break reg ex into factors and terms * Date: June 5, 1993 * Passed: none, but looks at local global pattern. * Returns: none */int term( void ){int r; r = factor( ); if (regx.pattern[lookahead] == '(' || letter( regx.pattern[lookahead] )) term( ); else if (Kleene_star( regx.pattern[lookahead] )) regx_error( reg11 ); return( r );}/* * Name: factor * Purpose: add NNODEs and CNODEs to local global nfa
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -