📄 regx.c
字号:
if (!myiswhitespc( search_string[j-1] )) {
rc = put_dq( n1 );
if (deque[dq_tail] == SCAN)
--j;
}
}
}
break;
default :
assert( FALSE );
break;
}
if (rc == ERROR)
return( 0 );
} else {
assert( nfa_pointer->node_type[state] == CNODE );
n1 = nfa_pointer->next1[state];
if (push_dq( n1 ) == ERROR)
return( 0 );
if (n1 != nfa_pointer->next2[state])
if (push_dq( nfa_pointer->next2[state] ) == ERROR)
return( 0 );
}
if (dequeempty( ) || j > search_len)
return( ERROR );
state = pop_dq( );
}
return( j - search_col );
}
int put_dq( int v )
{
if (queued_states[v+1] == 0) {
++queued_states[v+1];
deque[dq_head] = v;
dq_head--;
if (dq_head < 0)
dq_head = REGX_SIZE - 1;
if (dq_tail == dq_head) {
nfa_status = ERROR;
show_search_message( NFA_GAVE_UP, g_display.diag_color );
return( ERROR );
}
}
return( OK );
}
int push_dq( int v )
{
++dq_tail;
if (dq_tail == dq_head) {
nfa_status = ERROR;
show_search_message( NFA_GAVE_UP, g_display.diag_color );
return( ERROR );
} else {
if (dq_tail > REGX_SIZE - 1)
dq_tail = 0;
deque[dq_tail] = v;
return( OK );
}
}
int pop_dq( void )
{
register int v;
v = deque[dq_tail];
dq_tail--;
if (dq_tail < 0)
dq_tail = REGX_SIZE - 1;
return( v );
}
int dequeempty( void )
{
if (dq_tail > dq_head)
return( dq_tail - dq_head == 1 );
else
return( dq_tail + REGX_SIZE - dq_head == 1 );
}
int build_nfa( void )
{
regx_rc = OK;
init_nfa( );
lookahead = 0;
reg_len = strlen( (char *)regx.pattern );
parser_state = 1;
nfa.next1[0] = expression( );
if (regx_rc == OK) {
emit_cnode( 0, START, nfa.next1[0], nfa.next1[0] );
emit_cnode( parser_state, ACCEPT, 0, 0 );
regx.node_count = parser_state + 2;
}
if (g_status.command == DefineRegXGrep) {
memcpy( &sas_nfa, &nfa, sizeof(NFA_TYPE) );
memcpy( &sas_regx, ®x, sizeof(REGX_INFO) );
}
return( regx_rc );
}
int expression( void )
{
int t1;
int t2;
int r;
t1 = term( );
r = t1;
if (regx.pattern[lookahead] == '|') {
lookahead++;
parser_state++;
r = t2 = parser_state;
parser_state++;
emit_cnode( t2, OR_NODE, expression( ), t1 );
emit_cnode( t2-1, JUXTA, parser_state, parser_state );
if (nfa.node_type[t1] == CNODE)
t1 = min( nfa.next1[t1], nfa.next2[t1] );
nfa.next1[t1-1] = t2;
if (nfa.node_type[t1-1] == NNODE)
nfa.next2[t1-1] = t2;
}
return( r );
}
int term( void )
{
int r;
r = factor( );
if (regx.pattern[lookahead] == '(' || letter( regx.pattern[lookahead] ))
term( );
else if (Kleene_star( regx.pattern[lookahead] ))
regx_error( reg11 );
return( r );
}
int factor( void )
{
int t1;
int t2;
int r;
int c;
int paren;
t1 = parser_state;
c = regx.pattern[lookahead];
if (c == '(') {
lookahead++;
t2 = expression( );
if (regx.pattern[lookahead] == ')') {
lookahead++;
paren = TRUE;
} else
/*
* unmatched open parens
*/
regx_error( reg2 );
} else if (letter( c )) {
paren = FALSE;
switch (c) {
case ']' :
regx_error( reg9 );
break;
case '.' :
ttype = WILD;
break;
case ',' :
ttype = WHITESPACE;
break;
case '^' :
ttype = BOL;
break;
case '$' :
ttype = EOL;
break;
case '<' :
ttype = BOW;
break;
case '>' :
ttype = EOW;
break;
case '\\' :
++lookahead;
ttype = mode.search_case == IGNORE ? IGNORE_ASCII : STRAIGHT_ASCII;
if (lookahead != '\0') {
if (regx.pattern[lookahead] != ':')
c = escape_char( regx.pattern[lookahead] );
else {
c = regx.pattern[lookahead+1];
if (c != '\0') {
switch (c) {
case 'a' :
++lookahead;
ttype = ALPHANUM;
break;
case 'b' :
++lookahead;
ttype = WHITESPACE;
break;
case 'c' :
++lookahead;
ttype = ALPHA;
break;
case 'd' :
++lookahead;
ttype = DECIMAL;
break;
case 'h' :
++lookahead;
ttype = HEX;
break;
case 'l' :
++lookahead;
ttype = LOWER;
break;
case 'u' :
++lookahead;
ttype = UPPER;
break;
default :
c = escape_char( regx.pattern[lookahead] );
break;
}
} else
c = escape_char( regx.pattern[lookahead] );
}
} else
regx_error( reg4 );
break;
case '[' :
memset( class_bits, 0, sizeof(char) * 32 );
++lookahead;
if (lookahead != '\0') {
c = regx.pattern[lookahead];
if (c == '^') {
++lookahead;
ttype = NOTCLASS;
} else
ttype = CLASS;
c1 = regx.pattern[lookahead];
do {
class_bits[c1/8] |= bit[c1%8];
if (c1 != '\0')
++lookahead;
if (regx.pattern[lookahead] == '-') {
++lookahead;
c2 = regx.pattern[lookahead];
if (c2 != '\0') {
if (c2 < c1) {
c = c2;
c2 = c1;
c1 = c;
}
for (c=c1; c <= c2; c++)
class_bits[c/8] |= bit[c%8];
if (regx.pattern[lookahead] != '\0')
++lookahead;
} else
regx_error( reg10 );
}
c1 = regx.pattern[lookahead];
} while (c1 != '\0' && c1 != ']');
if (c1 == '\0')
regx_error( reg5 );
} else
regx_error( reg6 );
break;
default :
if (mode.search_case == IGNORE) {
c = tolower( c );
ttype = IGNORE_ASCII;
} else
ttype = STRAIGHT_ASCII;
}
emit_nnode( parser_state, ttype, c, parser_state+1, parser_state+1 );
if (ttype == CLASS || ttype == NOTCLASS) {
nfa.class[parser_state] = calloc( 32, sizeof(char) );
if (nfa.class[parser_state] != NULL)
memcpy( nfa.class[parser_state], class_bits, sizeof( char )*32 );
else
regx_error( reg7 );
}
t2 = parser_state;
lookahead++;
parser_state++;
} else if (c == '\0')
return( 0 );
else {
if (c == '*' || c == '+' || c == '?')
regx_error( reg8 );
else if (c == ')')
regx_error( reg3 );
else
regx_error( reg2 );
}
c = regx.pattern[lookahead];
switch (c) {
case '*' :
emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
r = parser_state;
if (nfa.node_type[t1] == CNODE)
t1 = min( nfa.next1[t1], nfa.next2[t1] );
nfa.next1[t1-1] = parser_state;
if (nfa.node_type[t1-1] == NNODE)
nfa.next2[t1-1] = parser_state;
lookahead++;
parser_state++;
paren = FALSE;
break;
case '+' :
if (paren == TRUE) {
emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
parser_state++;
}
emit_cnode( parser_state, JUXTA, t2, t2 );
r = parser_state;
parser_state++;
if (paren == FALSE) {
nfa.next1[t2] = parser_state;
if (nfa.node_type[t2] == NNODE)
nfa.next2[t2] = parser_state;
}
emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
if (nfa.node_type[t1] == CNODE)
t1 = min( nfa.next1[t1], nfa.next2[t1] );
nfa.next1[t1-1] = r;
if (nfa.node_type[t1-1] == NNODE)
nfa.next2[t1-1] = r;
parser_state++;
lookahead++;
paren = FALSE;
break;
case '?' :
emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
parser_state++;
r = parser_state;
emit_cnode( parser_state, ZERO_OR_ONE, parser_state+1, t2 );
if (nfa.node_type[t1] == CNODE)
t1 = min( nfa.next1[t1], nfa.next2[t1] );
nfa.next1[t1-1] = parser_state;
if (nfa.node_type[t1-1] == NNODE)
nfa.next2[t1-1] = parser_state;
parser_state++;
lookahead++;
paren = FALSE;
break;
default :
r = t2;
break;
}
if (paren) {
emit_cnode( parser_state, JUXTA, parser_state+1, parser_state+1 );
parser_state++;
}
return( r );
}
int escape_char( int let )
{
switch (let) {
case '0' :
let = 0x00;
break;
case 'a' :
let = 0x07;
break;
case 'b' :
let = 0x08;
break;
case 'n' :
let = 0x0a;
break;
case 'r' :
let = 0x0d;
break;
case 't' :
let = 0x09;
break;
default :
break;
}
return( let );
}
void emit_cnode( int index, int ttype, int n1, int n2 )
{
assert( index >= 0);
assert( index < REGX_SIZE );
nfa.node_type[index] = CNODE;
nfa.term_type[index] = ttype;
nfa.c[index] = 0;
nfa.next1[index] = n1;
nfa.next2[index] = n2;
}
void emit_nnode( int index, int ttype, int c, int n1, int n2 )
{
assert( index >= 0);
assert( index < REGX_SIZE );
nfa.node_type[index] = NNODE;
nfa.term_type[index] = ttype;
nfa.c[index] = c;
nfa.next1[index] = n1;
nfa.next2[index] = n2;
}
void init_nfa( void )
{
int i;
for (i=0; i < REGX_SIZE; i++) {
nfa.node_type[i] = NNODE;
nfa.term_type[i] = 0;
nfa.c[i] = 0;
nfa.next1[i] = 0;
nfa.next2[i] = 0;
if (nfa.class[i] != NULL)
free( nfa.class[i] );
nfa.class[i] = NULL;
}
}
void regx_error( char *line )
{
error( WARNING, regx_error_line, line );
regx_rc = ERROR;
}
int separator( int let )
{
return( let == 0 || let == ')' || let == '|' );
}
int Kleene_star( int let )
{
return( let == '*' || let == '+' || let == '?' );
}
int letter( int let )
{
return( !separator( let ) && !Kleene_star( let ) );
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -