📄 regx.c
字号:
{
int t1;
int t2;
int r;
int c;
int paren;
t1 = parser_state;
c = regx.pattern[lookahead];
if (c == '(') {
lookahead++;
t2 = expression( );
if (regx.pattern[lookahead] == ')') {
lookahead++;
paren = TRUE;
} else
/*
* unmatched open parens
*/
regx_error( reg2 );
} else if (letter( c )) {
paren = FALSE;
switch (c) {
case ']' :
/*
* unmatched close bracket
*/
regx_error( reg9 );
break;
case '.' :
ttype = WILD;
break;
case ',' :
ttype = WHITESPACE;
break;
case '^' :
ttype = BOL;
break;
case '$' :
ttype = EOL;
break;
case '<' :
ttype = BOW;
break;
case '>' :
ttype = EOW;
break;
case '\\' :
++lookahead;
ttype = mode.search_case == IGNORE ? IGNORE_ASCII : STRAIGHT_ASCII;
if (lookahead != '\0') {
if (regx.pattern[lookahead] != ':')
c = escape_char( regx.pattern[lookahead] );
/*
* predefined unix-like macros.
*/
else {
c = regx.pattern[lookahead+1];
if (c != '\0') {
switch (c) {
case 'a' :
++lookahead;
ttype = ALPHANUM;
break;
case 'b' :
++lookahead;
ttype = WHITESPACE;
break;
case 'c' :
++lookahead;
ttype = ALPHA;
break;
case 'd' :
++lookahead;
ttype = DECIMAL;
break;
case 'h' :
++lookahead;
ttype = HEX;
break;
case 'l' :
++lookahead;
ttype = LOWER;
break;
case 'u' :
++lookahead;
ttype = UPPER;
break;
default :
c = escape_char( regx.pattern[lookahead] );
break;
}
} else
c = escape_char( regx.pattern[lookahead] );
}
} else
regx_error( reg4 );
break;
case '[' :
memset( class_bits, 0, sizeof(char) * 32 );
++lookahead;
if (lookahead != '\0') {
c = regx.pattern[lookahead];
if (c == '^') {
++lookahead;
ttype = NOTCLASS;
} else
ttype = CLASS;
c1 = regx.pattern[lookahead];
do {
class_bits[c1/8] |= bit[c1%8];
if (c1 != '\0')
++lookahead;
if (regx.pattern[lookahead] == '-') {
++lookahead;
c2 = regx.pattern[lookahead];
if (c2 != '\0') {
/*
* just in case the hi for the range is given first,
* switch c1 and c2, e.g. [9-0].
*/
if (c2 < c1) {
c = c2;
c2 = c1;
c1 = c;
}
for (c=c1; c <= c2; c++)
class_bits[c/8] |= bit[c%8];
if (regx.pattern[lookahead] != '\0')
++lookahead;
} else
regx_error( reg10 );
}
c1 = regx.pattern[lookahead];
} while (c1 != '\0' && c1 != ']');
if (c1 == '\0')
regx_error( reg5 );
} else
regx_error( reg6 );
break;
default :
if (mode.search_case == IGNORE) {
c = tolower( c );
ttype = IGNORE_ASCII;
} else
ttype = STRAIGHT_ASCII;
}
emit_nnode( parser_state, ttype, c, parser_state+1, parser_state+1 );
if (ttype == CLASS || ttype == NOTCLASS) {
nfa.class[parser_state] = calloc( 32, sizeof(char) );
if (nfa.class[parser_state] != NULL)
memcpy( nfa.class[parser_state], class_bits, sizeof( char )*32 );
else
regx_error( reg7 );
}
t2 = parser_state;
lookahead++;
parser_state++;
} else if (c == '\0')
return( 0 );
else {
if (c == '*' || c == '+' || c == '?')
regx_error( reg8 );
else if (c == ')')
regx_error( reg3 );
else
regx_error( reg2 );
}
c = regx.pattern[lookahead];
switch (c) {
case '*' :
emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
r = parser_state;
if (nfa.node_type[t1] == CNODE)
t1 = min( nfa.next1[t1], nfa.next2[t1] );
nfa.next1[t1-1] = parser_state;
if (nfa.node_type[t1-1] == NNODE)
nfa.next2[t1-1] = parser_state;
lookahead++;
parser_state++;
paren = FALSE;
break;
case '+' :
if (paren == TRUE) {
emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
parser_state++;
}
emit_cnode( parser_state, JUXTA, t2, t2 );
r = parser_state;
parser_state++;
if (paren == FALSE) {
nfa.next1[t2] = parser_state;
if (nfa.node_type[t2] == NNODE)
nfa.next2[t2] = parser_state;
}
emit_cnode( parser_state, CLOSURE, parser_state+1, t2 );
if (nfa.node_type[t1] == CNODE)
t1 = min( nfa.next1[t1], nfa.next2[t1] );
nfa.next1[t1-1] = r;
if (nfa.node_type[t1-1] == NNODE)
nfa.next2[t1-1] = r;
parser_state++;
lookahead++;
paren = FALSE;
break;
case '?' :
emit_cnode( parser_state, JUXTA, parser_state+2, parser_state+2 );
parser_state++;
r = parser_state;
emit_cnode( parser_state, ZERO_OR_ONE, parser_state+1, t2 );
if (nfa.node_type[t1] == CNODE)
t1 = min( nfa.next1[t1], nfa.next2[t1] );
nfa.next1[t1-1] = parser_state;
if (nfa.node_type[t1-1] == NNODE)
nfa.next2[t1-1] = parser_state;
parser_state++;
lookahead++;
paren = FALSE;
break;
default :
r = t2;
break;
}
/*
* close parens seem to need a JUXTA node to gather all reg ex's
* to a common point.
*/
if (paren) {
emit_cnode( parser_state, JUXTA, parser_state+1, parser_state+1 );
parser_state++;
}
return( r );
}
/*
* Name: escape_char
* Purpose: recognize escape and C escape sequences
* Date: June 5, 1993
* Passed: let: letter to escape
* Returns: escaped letter
*/
int escape_char( int let )
{
switch (let) {
case '0' :
let = 0x00;
break;
case 'a' :
let = 0x07;
break;
case 'b' :
let = 0x08;
break;
case 'n' :
let = 0x0a;
break;
case 'r' :
let = 0x0d;
break;
case 't' :
let = 0x09;
break;
default :
break;
}
return( let );
}
/*
* Name: emit_cnode
* Purpose: add a null node to our pattern matching machine
* Date: June 5, 1993
* Passed: index: current node in nfa
* ttype: terminal type - CLOSURE, OR, JUXTA, etc...
* n1: pointer to next state, path for lambda transitions
* n2: pointer to other next state, usually a NNODE
* Returns: none, but modifies local global nfa.
*/
void emit_cnode( int index, int ttype, int n1, int n2 )
{
assert( index >= 0);
assert( index < REGX_SIZE );
nfa.node_type[index] = CNODE;
nfa.term_type[index] = ttype;
nfa.c[index] = 0;
nfa.next1[index] = n1;
nfa.next2[index] = n2;
}
/*
* Name: emit_nnode
* Purpose: add a to our pattern matching machine
* Date: June 5, 1993
* Passed: index: current node in nfa
* ttype: terminal type - EOL, ASCII, etc...
* c: letter this node recognizes
* n1: pointer to next state
* n2: pointer to other next state, which can be same as n1
* Returns: none, but modifies local global nfa.
*/
void emit_nnode( int index, int ttype, int c, int n1, int n2 )
{
assert( index >= 0);
assert( index < REGX_SIZE );
nfa.node_type[index] = NNODE;
nfa.term_type[index] = ttype;
nfa.c[index] = c;
nfa.next1[index] = n1;
nfa.next2[index] = n2;
}
/*
* Name: init_nfa
* Purpose: set local global nfa to NULL state
* Date: June 5, 1993
* Passed: none
*/
void init_nfa( void )
{
int i;
for (i=0; i < REGX_SIZE; i++) {
nfa.node_type[i] = NNODE;
nfa.term_type[i] = 0;
nfa.c[i] = 0;
nfa.next1[i] = 0;
nfa.next2[i] = 0;
if (nfa.class[i] != NULL)
free( nfa.class[i] );
nfa.class[i] = NULL;
}
}
/*
* Name: regx_error
* Purpose: display reg ex error message and set reg ex error code
* Date: June 5, 1993
* Passed: line: line to display error
* Returns: none, but sets reg ex return code to error.
*/
void regx_error( char *line )
{
error( WARNING, regx_error_line, line );
regx_rc = ERROR;
}
/*
* Name: separator
* Purpose: determine if character is a reg ex separator
* Date: June 5, 1993
* Passed: let: letter to look at
* Returns: whether or not 'let' is a separator
*/
int separator( int let )
{
return( let == 0 || let == ')' || let == '|' );
}
/*
* Name: Kleene_star
* Purpose: determine if character is a reg ex operator
* Date: June 5, 1993
* Passed: let: letter to look at
* Returns: whether or not 'let' is a letter
*/
int Kleene_star( int let )
{
return( let == '*' || let == '+' || let == '?' );
}
/*
* Name: letter
* Purpose: determine if character is a recognized reg ex character
* Date: June 5, 1993
* Passed: let: letter to look at
* Returns: whether or not 'let' is a letter.
*/
int letter( int let )
{
return( !separator( let ) && !Kleene_star( let ) );
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -