📄 reimpl2.h
字号:
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft Corporation, 1994 - 2002.
//
// File: reimpl2.h
//
// Functions: helpers for matching and substituting regular expressions
//
// Notes: implementation details that really belong in a cpp file,
// but can't because of template weirdness
//
// Author: Eric Niebler ( ericne@microsoft.com )
//
// History: 8/15/2001 ericne Created
//
//----------------------------------------------------------------------------
#ifndef REIMPL_H
#define REIMPL_H
//
// Helper functions for match and substitute
//
#ifndef _MSC_VER
#define __assume( x ) assert( false ); break;
#endif
namespace detail
{
// For use while doing uppercase/lowercase conversions:
// For use while doing uppercase/lowercase conversions:
inline char regex_toupper( char ch ) { return ( char )toupper( ch ); }
inline char regex_tolower( char ch ) { return ( char )tolower( ch ); }
inline wchar_t regex_toupper( wchar_t ch ) { return ( wchar_t )towupper( ch ); }
inline wchar_t regex_tolower( wchar_t ch ) { return ( wchar_t )towlower( ch ); }
template< typename II, typename CI >
inline void regex_toupper( II ibegin, CI iend )
{
typedef typename std::iterator_traits<CI>::value_type char_type;
typedef std::char_traits<char_type> traits_type;
for( ; iend != ibegin; ++ibegin )
traits_type::assign( *ibegin, regex_toupper( *ibegin ) );
}
template< typename II, typename CI >
inline void regex_tolower( II ibegin, CI iend )
{
typedef typename std::iterator_traits<CI>::value_type char_type;
typedef std::char_traits<char_type> traits_type;
for( ; iend != ibegin; ++ibegin )
traits_type::assign( *ibegin, regex_tolower( *ibegin ) );
}
// Work-around for a template-template parameter problem on VC7.0
template<typename T> struct type2type { typedef T type; };
template<bool F> struct bool2type { enum { value = F }; };
typedef bool2type<true> true_t;
typedef bool2type<false> false_t;
//
// Helper fn for swapping two auto_ptr's
//
template< typename T >
inline void swap_auto_ptr( std::auto_ptr<T> & left, std::auto_ptr<T> & right )
{
std::auto_ptr<T> temp( left );
left = right;
right = temp;
}
// --------------------------------------------------------------------------
//
// Class: match_param
//
// Description: Struct that contains the state of the matching operation.
// Passed by reference to all recursive_match_all_ and recursive_match_this routines.
//
// Methods: match_param - ctor
// match_param - ctor
//
// Members: ibegin - start of the string
// istart - start of this iteration
// istop - end of the string
// prgbackrefs - pointer to backref array
//
// History: 8/14/2000 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename CI >
struct match_param
{
typedef backref_tag< CI > backref_type;
typedef std::vector<backref_type> backref_vector;
// Used by the recursive_match routines
backref_vector * prgbackrefs;
CI ibegin;
CI istart;
CI istop;
// Used by the iterative_match routines
CI icur;
unsafe_stack * pstack;
sub_expr_base<CI> const * next;
bool no0len;
sub_expr_base<CI> const * first;
match_param(
CI _istart,
CI _istop,
std::vector< backref_tag< CI > > * _prgbackrefs )
: prgbackrefs( _prgbackrefs ),
ibegin( _istart ),
istart( _istart ),
istop( _istop ),
icur( _istart ),
pstack( NULL ),
next( NULL ),
no0len( false )
{
}
match_param(
CI _ibegin,
CI _istart,
CI _istop,
std::vector< backref_tag< CI > > * _prgbackrefs )
: prgbackrefs( _prgbackrefs ),
ibegin( _ibegin ),
istart( _istart ),
istop( _istop ),
icur( _istart ),
pstack( NULL ),
next( NULL ),
no0len( false )
{
}
};
// --------------------------------------------------------------------------
//
// Class: regex_arena
//
// Description: A small, fast allocator for speeding up pattern compilation.
// Every basic_rpattern object has an arena as a member.
// sub_expr objects can only be allocated from this arena.
// Memory is alloc'ed in chunks using ::operator new(). Chunks
// are freed en-masse when the arena gets destroyed, or when
// deallocate is explicitly called.
//
// Methods: _new_block - create a new block & put it in m_pfirst
// regex_arena - c'tor
// ~regex_arena - free all memory blocks
// allocate - Grab some preallocated memory.
// deallocate - free all memory blocks.
// max_size - the largest chunk of memory the arena is
// capable of allocating.
//
// Members: m_pfirst - ptr to first block in list
//
// History: 8/17/2001 - ericne - Created
//
// --------------------------------------------------------------------------
class regex_arena
{
struct block;
friend struct block;
block * m_pfirst;
size_t m_default_size;
void _new_block( size_t size ); //throw( std::bad_alloc );
regex_arena( regex_arena const & );
regex_arena & operator=( regex_arena const & );
public:
explicit regex_arena( size_t default_size );
~regex_arena();
void * allocate( size_t size ); //throw( std::bad_alloc );
void deallocate(); //throw();
size_t max_size() const;
void swap( regex_arena & that ); // throw()
};
// --------------------------------------------------------------------------
//
// Class: sub_expr_base
//
// Description: patterns are "compiled" into a directed graph of sub_expr_base
// structs. Matching is accomplished by traversing this graph.
//
// Methods: ~sub_expr_base - virt dtor so cleanup happens correctly
// recursive_match_all_ - match this sub-expression and all following
// sub-expression
//
// History: 8/14/2000 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename CI >
struct sub_expr_base
{
virtual ~sub_expr_base() = 0;
virtual bool recursive_match_all_( match_param<CI> &, CI ) const = 0; //throw() = 0;
virtual bool recursive_match_all_c( match_param<CI> &, CI ) const = 0; //throw() = 0;
virtual bool iterative_match_this_( match_param<CI> & ) const = 0; //throw() = 0;
virtual bool iterative_match_this_c( match_param<CI> & ) const = 0; //throw() = 0;
virtual bool iterative_rematch_this_( match_param<CI> & ) const = 0; //throw() = 0;
virtual bool iterative_rematch_this_c( match_param<CI> & ) const = 0; //throw() = 0;
// Use the regex_arena for memory management
static void * operator new( size_t size, regex_arena & arena )
{
return arena.allocate( size );
}
static void operator delete( void *, regex_arena & )
{
}
// Invoke the d'tor, but don't bother freeing memory. That will
// happen automatically when the arena object gets destroyed.
static void operator delete( void * )
{
}
// For choosing an appropriate virtual function based on a compile time constant
bool recursive_match_all_( match_param<CI> & param, CI icur, false_t ) const //throw()
{
return recursive_match_all_( param, icur );
}
bool recursive_match_all_( match_param<CI> & param, CI icur, true_t ) const //throw()
{
return recursive_match_all_c( param, icur );
}
bool iterative_match_this_( match_param<CI> & param, false_t ) const //throw()
{
return iterative_match_this_( param );
}
bool iterative_match_this_( match_param<CI> & param, true_t ) const //throw()
{
return iterative_match_this_c( param );
}
bool iterative_rematch_this_( match_param<CI> & param, false_t ) const //throw()
{
return iterative_rematch_this_( param );
}
bool iterative_rematch_this_( match_param<CI> & param, true_t ) const //throw()
{
return iterative_rematch_this_c( param );
}
};
template< typename CI >
inline sub_expr_base<CI>::~sub_expr_base()
{
}
// --------------------------------------------------------------------------
//
// Class: subst_node
//
// Description: Substitution strings are parsed into an array of these
// structures in order to speed up subst operations.
//
// Members: stype - type of this struct
// subst_string - do a string substitution
// subst_backref - do a bacref substitution
// op - execute an operation
//
// History: 8/14/2000 - ericne - Created
//
// --------------------------------------------------------------------------
struct subst_node
{
enum subst_type
{
SUBST_STRING,
SUBST_BACKREF,
SUBST_OP
};
enum { PREMATCH = -1, POSTMATCH = -2 };
enum op_type
{
UPPER_ON = SUBST_UPPER_ON,
UPPER_NEXT = SUBST_UPPER_NEXT,
LOWER_ON = SUBST_LOWER_ON,
LOWER_NEXT = SUBST_LOWER_NEXT,
ALL_OFF = SUBST_ALL_OFF
};
subst_type stype;
union
{
struct
{
size_t rstart;
size_t rlength;
} subst_string;
size_t subst_backref;
op_type op;
};
};
typedef std::list<subst_node> subst_list_type;
size_t DEFAULT_BLOCK_SIZE();
// --------------------------------------------------------------------------
//
// Class: basic_rpattern_base_impl
//
// Description:
//
// Methods: basic_rpattern_base_impl - ctor
// flags - get the state of the flags
// uses_backrefs - true if the backrefs are referenced
// get_first_subexpression - return ptr to first sub_expr struct
// get_width - get min/max nbr chars this pattern can match
// loops - if false, we only need to try to match at 1st position
// cgroups - number of visible groups
// _cgroups_total - total number of groups, including hidden ( ?: ) groups
// get_pat - get string representing the pattern
// get_subst - get string representing the substitution string
// get_subst_list - get the list of subst nodes
// _normalize_string - perform character escaping
//
// Members: m_fuses_backrefs - true if subst string refers to backrefs
// m_floop - false if pat only needs to be matched in one place
// m_cgroups - total count of groups
// m_cgroups_visible - count of visible groups
// m_flags - the flags
// m_nwidth - width of this pattern
// m_pat - pattern string
// m_subst - substitution string
// m_subst_list - list of substitution nodes
// m_pfirst - ptr to first subexpression to match
//
// Typedefs: char_type -
// string_type -
// size_type -
//
// History: 8/14/2000 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename CI >
class basic_rpattern_base_impl
{
basic_rpattern_base_impl( basic_rpattern_base_impl<CI> const & );
basic_rpattern_base_impl & operator=( basic_rpattern_base_impl<CI> const & );
protected:
typedef typename std::iterator_traits<CI>::value_type char_type;
typedef std::char_traits<char_type> traits_type;
typedef std::basic_string<char_type> string_type;
typedef size_t size_type;
typedef backref_tag<CI> backref_type;
typedef std::vector<backref_type> backref_vector;
explicit basic_rpattern_base_impl(
REGEX_FLAGS flags = NOFLAGS,
REGEX_MODE mode = MODE_DEFAULT,
string_type const & pat = string_type(),
string_type const & subst = string_type() ) //throw()
: m_arena( DEFAULT_BLOCK_SIZE() ),
m_fuses_backrefs( false ),
m_floop( true ),
m_fok_to_recurse( true ),
m_cgroups( 0 ),
m_cgroups_visible( 0 ),
m_flags( flags ),
m_mode( mode ),
m_nwidth( uninit_width() ),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -