📄 myreg.cpp
字号:
*(r+1) = end;
}
return;
}
// insert point?
if ( *r > start ) { break; }
// goto next
r+=2;
}
if ( *r ) {
// move the rest to the tail
while( tail >= r ) {
*(tail+2) = *tail;
*(tail+3) = *(tail+1);
tail -= 2;
}
}
}
// write the range to the insert point
len += 2;
*r = start;
*(r+1) = end;
*(range_list + len ) = 0;
}
void merge_range_list( char* range_list ) {
assert( strlen( range_list ) > 0 );
char tmp_range_list[MAX_RANGE_LIST_SIZE];
char* r = range_list;
char* w = tmp_range_list;
char s1 = 0, s2 = 0, e1 = 0, e2 = 0;
s1 = *r++;
e1 = *r++;
while( *r ) {
s2 = *r++;
e2 = *r++;
// according to the insert operation, s2 is always less than s1, and there is no same s
// there is three situation ( in x axis )
// s1,e1,s2,e2 -- goto [s2,e2]
// s1,s2,e1,e2 -- replace e1 with e2
// s1,s2,e2,e1 -- ignore [s2,e2]
if ( s2 > e1 ) {
// write current range
*w++ = s1;
*w++ = e1;
// goto [s2,e2]
s1 = s2;
e1 = e2;
continue;
}
if ( s2 <= e1 && e1 <= e2 ) {
e1 = e2;
continue;
}
}
// write the last range
*w++ = s1;
*w++ = e1;
*w = 0;
printf( "Src ranges %s\n", range_list );
printf( "Merged ranges %s\n", tmp_range_list );
strcpy( range_list, tmp_range_list );
}
/*
<range> ::= <symbol>
| <symbol> '-' <symbol>
*/
int range( REG_INTERP* inter, char& start_out, char& end_out, REG_CHAR_TYPE& t_out ) {
REG_CHAR_TYPE t = 0;
if( LOOKAHEAD(inter) == ']' ) return 0;
get_symbol_char ( inter, start_out, t );
if ( LOOKAHEAD(inter) == '-' ) {
READ(inter);
// range text
get_symbol_char( inter, end_out, t );
t_out = REG_TEXT;
if( end_out < start_out ) {
swap( start_out, end_out );
}
} else {
// this could be wildchar
t_out = t;
end_out = start_out;
}
printf( "Range %c %c\n", start_out, end_out );
return 1;
}
inline REG_NODE* range_node( REG_INTERP* inter, char* list ) {
ENLARGE_POOL_WHEN_NECESSARY(inter)
REG_NODE* n = GET_FREE_NODE(inter);
n->type = REG_TEXT | REG_RANGE_INPUT;
n->nullable = 0;
char* r = list;
char c1 = 0, c2 = 0;
while ( *r ) {
c1 = *r++;
c2 = *r++;
if ( c1 == c2 ) {
append_reg_node_input( n, c1 );
} else {
append_reg_node_input( n, c1, c2 );
}
}
n->first_set = new_set( inter->factory, n );
n->last_set = n->first_set;
// empty set
n->follow_set = NULL;
return n;
}
inline REG_NODE* char_node( REG_INTERP* inter, char c ) {
//printf ("char_node %c\n", c );
ENLARGE_POOL_WHEN_NECESSARY(inter)
REG_NODE* n = GET_FREE_NODE(inter);
n->type = REG_TEXT;
set_reg_node_input( n, c );
n->nullable = 0;
n->first_set = new_set( inter->factory, n );
n->last_set = n->first_set;
// empty set
n->follow_set = NULL;
return n;
}
inline REG_NODE* wildchar_node( REG_INTERP* inter, char c ) {
char range[] = { 1, '\n'-1, '\n'+1, REG_MAX_CHAR, 0 };
// REG_NODE* n = char_node( inter, c );
// n->type = REG_WILDCHAR;
// set_reg_node_input( n, c );
// n->nullable = 0;
// n->first_set = new_set( n );
// n->last_set = n->first_set;
// empty set
// n->follow_set = NULL;
// return n;
return range_node( inter, range );
}
inline REG_NODE* oper( REG_INTERP* inter, char op_char, REG_NODE* l, REG_NODE* r ) {
ENLARGE_POOL_WHEN_NECESSARY(inter)
REG_NODE* op = GET_FREE_NODE(inter);
// assign binary operator
set_reg_node_input( op, op_char );
op->left = l;
op->right = r;
op->type = REG_OPER;
if ( l ) { l->parent = op; }
if ( r ) { r->parent = op; }
// empty follow set by default
op->follow_set = NULL;
return op;
}
inline REG_NODE* oper_join( REG_INTERP* inter, REG_NODE* l, REG_NODE* r ) {
printf ( "join %x %x\n", l, r );
assert( l != NULL && r != NULL );
assert( l != r );
// get operator node
REG_NODE* op = oper( inter, '.', l , r );
// nullable is ture if both side is nullable
op->nullable = l->nullable && r->nullable;
// operator will always have a set
// calculate the first set
if ( l->nullable ) {
// union the first set
op->first_set = union_set( inter->factory, l->first_set, r->first_set );
} else {
// use the first set of l only
op->first_set = l->first_set;
}
// calculate the last set
if ( r->nullable ) {
// union the last set
op->last_set = union_set( inter->factory, l->last_set, r->last_set );
} else {
// use the last set of r only
op->last_set = r->last_set;
}
// calculate followset
// the followset of every node in the last set of l contains the first set of r
// if the node is char node, the first set and the last set is itself
REG_NODE** n = (REG_NODE**)l->last_set->set;
while( *n ) {
(*n)->follow_set = union_set( inter->factory, (*n)->follow_set, r->first_set );
n++;
}
return op;
}
inline REG_NODE* oper_or( REG_INTERP* inter, REG_NODE* l, REG_NODE* r ) {
printf ( "or %x %x\n", l, r );
assert( l != NULL && r != NULL );
assert( l != r );
// get operator node
REG_NODE* op = oper( inter, '|', l , r );
// nullable is true if any side is nullable
op->nullable = l->nullable || r->nullable;
// calculate first set and last set
// union the first set
op->first_set = union_set( inter->factory, l->first_set, r->first_set );
// union the last set
op->last_set = union_set( inter->factory, l->last_set, r->last_set );
return op;
}
inline REG_NODE* oper_aster( REG_INTERP* inter, REG_NODE* l ) {
printf ( "aster %x\n", l );
REG_NODE* op = oper( inter, '*', l, NULL );
op->nullable = 1;
op->first_set = l->first_set;
op->last_set = l->last_set;
// calculate followset
// the followset of every node in the last set of l contains the first set of l
// if the node is char node, the first set and the last set is itself
REG_NODE** n = (REG_NODE**)l->last_set->set;
// REG_NODE** fl = NULL;
while( *n ) {
(*n)->follow_set = union_set( inter->factory, (*n)->follow_set, l->first_set );
n++;
}
return op;
}
inline REG_NODE* oper_plus( REG_INTERP* inter, REG_NODE* l ) {
printf ( "plus %x\n", l );
REG_NODE* op = oper( inter, '*', l, NULL );
op->nullable = 0; // different from the aster node, it not nullable
op->first_set = l->first_set;
op->last_set = l->last_set;
// calculate followset
// the followset of every node in the last set of l contains the first set of l
// if the node is char node, the first set and the last set is itself
REG_NODE** n = (REG_NODE**)l->last_set->set;
// REG_NODE** fl = NULL;
while( *n ) {
(*n)->follow_set = union_set( inter->factory, (*n)->follow_set, l->first_set );
n++;
}
return op;
}
// print node to stdout
void PrintNode( REG_NODE* n ) {
printf( "Node: %x\n", n );
printf( "TYPE: %x %s %s\n", n->type,
( IS_TEXT( n->type ) ? "REG_TEXT" : "" ),
( IS_OPER( n->type ) ? "REG_OPER" : "" )
);
printf( "PARENT: %x\n", n->parent );
printf( "LEFT: %x\nRIGHT: %x\n", n->left, n->right );
printf( "INPUT: %s\n", n->input );
printf( "NULLABLE: %s\n", ( n->nullable ? "true" : "false" ) );
if ( n->first_set ) {
printf( "FIRST SET\n" );
PrintSet( n->first_set );
} else {
printf( "No first set\n" );
}
if ( n->last_set ) {
printf ( "LAST SET\n" );
PrintSet( n->last_set );
} else {
printf( "No last set\n" );
}
if ( n->follow_set ) {
printf( "FOLLOWER SET\n" );
PrintSet( n->follow_set );
} else {
printf( "No follower set\n" );
}
}
void PrintREXP( REG_NODE* root ) {
if ( IS_OPER(root->type) && root->input[0] == '*' ) {
printf ( "(" );
}
if ( root->left ) {
PrintREXP( root->left );
}
if ( IS_TEXT(root->type) ) {
printf( "%s", root->input );
}
if ( IS_OPER( root->type ) ) {
switch ( root->input[0] ) {
case '*':
printf( ")*\n" );
break;
case '|':
printf( "\t|\n" );
break;
case '.':
break;
}
}
if ( root->right ) {
PrintREXP( root->right );
}
}
void traverse( REG_NODE* root, int level ) {
if ( root->left ) {
traverse( root->left, level + 1 );
}
if ( root->right ) {
traverse( root->right, level + 1 );
}
printf( "LEVEL %x\n", level );
PrintNode( root );
printf( "============================================\n" );
}
// compile a regular expression
REG_INTERP* CompileReg( const char* text ) {
char* p = NULL;
if ( text[0] != '^' ) {
// no force start mark
p = (char*) malloc( strlen( text ) + 3 );
p[0] = '.';
p[1] = '*';
strcpy( p+2, text );
} else {
p = (char*)text;
}
REG_INTERP* inter = new_reg_interp( p );
PrintREXP( get_syntax_tree( inter ) );
printf("\n");
traverse( inter->root, 0 );
get_dfa( inter );
if ( p != text ) { free( p ); }
return inter;
}
// match the regular expression
REG_EXPORT int MatchReg( REG_INTERP* inter, char* text ) {
REG_DFA* dfa = inter->dfa;
DSTATE* ds = dfa->dstates[0];
DTRAN* dt = NULL;
// printf( "Begin match %s\n", text );
// printf( "Start at %x\n", ds );
char* p = text;
char c = 0;
while( c = *p++ ) {
// printf( "Input %c\n", c );
dt = find_dtran( ds, c );
if ( dt ) {
// there is target
ds = dt->tar;
// printf( "Goto %x according to dtran %x\n", ds, dt );
} else {
// the string match iff there is no dtran and the state is end state
// printf( "There is no dtran for input %c.\n", c );
// printf( "The end state is %x(%s)\n", ds, (ds->end ? "END" : "NONE_END" ) );
return ds->end;
}
}
// printf( "No more input\n" );
// printf( "The end state is %x(%s)\n", ds, (ds->end ? "END" : "NONE_END" ) );
return ( c == 0 && ds->end );
}
// release the complied regular expression instance
REG_EXPORT ReleaseReg( REG_INTERP* inter ) {
release_dfa( inter->dfa );
for( size_t i = 0; i< inter->node_count; i++ ) {
release_reg_node( &inter->nodes[i] );
}
free( inter->nodes );
release_set_factory( inter->factory );
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -