📄 regexpr2.cpp
字号:
//+---------------------------------------------------------------------------
//
// Copyright ( C ) Microsoft Corporation, 1994 - 2002.
//
// File: regexpr2.cpp
//
// Contents: implementation for rpattern methods, definitions for all the
// subexpression types used to perform the matching, the
// charset class definition .
//
// Classes: too many to list here
//
// Functions:
//
// Author: Eric Niebler ( ericne@microsoft.com )
//
// History: 12-11-1998 ericne Created
// 01-05-2001 ericne Removed dependency on VC's choice
// of STL iterator types.
// 08-15-2001 ericne Removed regexpr class, moved match
// state to match_results container.
// 09-17-2001 nathann Add DEBUG_HEAP_SUPPORT
// 11-16-2001 ericne Add stack-conservative algorithm
//
//----------------------------------------------------------------------------
#ifdef _MSC_VER
// unlimited inline expansion ( compile with /Ob1 or /Ob2 )
# pragma inline_recursion( on )
# pragma inline_depth( 255 )
// warning C4127: conditional expression is constant
// warning C4355: 'this' : used in base member initializer list
// warning C4702: unreachable code
// warning C4710: function 'blah' not inlined
// warning C4786: identifier was truncated to '255' characters in the debug information
# pragma warning( disable : 4127 4355 4702 4710 4786 )
#endif
#include <cctype>
#include <cwctype>
#include <cassert>
#include <malloc.h>
#include <algorithm>
#include <functional>
#if defined( _MSC_VER ) & defined( _MT )
# include <windows.h>
#endif
// If the implementation file has been included in the header, then we
// need to mark some functions as inline to prevent them from being multiply
// defined. But if the implementation file is not included in the header,
// we can't mark them as inline, otherwise the linker won't find them.
#ifdef REGEXPR_H
# define REGEXPR_H_INLINE inline
#else
# define REGEXPR_H_INLINE
# include "regexpr2.h"
#endif
#ifndef alloca
# define alloca _alloca
#endif
// Rather non-portable code below. The flags to the _isctype
// CRT routine, and even the _isctype routine itself, are
// not standard. This works for me on VC and on my linux box,
// but it probably won't work for everyone. :-(
#ifndef _MSC_VER
# define __assume( x ) assert( false ); return NULL;
# define _UPPER _ISupper
# define _LOWER _ISlower
# define _ALPHA _ISalpha
# define _DIGIT _ISdigit
# define _HEX _ISxdigit
# define _SPACE _ISspace
# define _PRINT _ISprint
# define _GRAPH _ISgraph
# define _BLANK _ISblank
# define _CONTROL _IScntrl
# define _PUNCT _ISpunct
# define _ALNUM _ISalnum
#else
# define _ALNUM ( _UPPER|_LOWER|_DIGIT )
# define _PRINT ( _BLANK|_PUNCT|_UPPER|_LOWER|_DIGIT )
# define _GRAPH ( _PUNCT|_UPPER|_LOWER|_DIGIT )
#endif
namespace regex
{
namespace detail
{
// For portably assigning a bare ptr to an auto_ptr.
// (don't use reset() because some STL implementations
// don't support it.)
template< typename T, typename U >
inline void assign_auto_ptr( std::auto_ptr<T> & lhs, U * rhs )
{
std::auto_ptr<T> temp( rhs );
lhs = temp;
}
// VC's STL member function adapters don't handle const member functions,
// so explicitly handle that special case with const_mem_fun1_t
template<class R, class Ty, class A>
class const_mem_fun1_t : public std::binary_function<Ty const *, A, R>
{
R ( Ty::*m_p )( A ) const;
public:
explicit const_mem_fun1_t( R ( Ty::*p )( A ) const )
: m_p( p ) {}
R operator()( Ty const * p, A arg ) const
{
return ( p->*m_p )( arg );
}
};
template<class R, class Ty, class A>
inline const_mem_fun1_t<R, Ty, A> mem_fun( R ( Ty::*p )( A ) const )
{
return const_mem_fun1_t<R, Ty, A>( p );
}
// On some systems, isctype is implemented as a macro. I need
// a function so that I can bind args and use it in algorithms.
#ifdef _isctype
# error _isctype is a macro. It needs to be a function.
#endif
#ifdef __isctype
inline int _isctype( int c, int type ) { return __isctype( c, type ); }
#endif
#if defined( _MSC_VER ) & defined( _MT )
// Global critical section used to synchronize the creation of static const patterns
class CRegExCritSect : private CRITICAL_SECTION
{
friend struct CRegExLock;
CRegExCritSect( CRegExCritSect const & );
CRegExCritSect() { InitializeCriticalSection( this ); }
void Enter() { EnterCriticalSection( this ); }
void Leave() { LeaveCriticalSection( this ); }
static CRegExCritSect & Instance()
{
static CRegExCritSect s_objRegExCritSect;
return s_objRegExCritSect;
}
public:
~CRegExCritSect() { DeleteCriticalSection( this ); }
};
REGEXPR_H_INLINE CRegExLock::CRegExLock()
{
CRegExCritSect::Instance().Enter();
}
REGEXPR_H_INLINE CRegExLock::~CRegExLock()
{
CRegExCritSect::Instance().Leave();
}
#endif
template< typename II, typename CI >
inline size_t parse_int( II & istr, CI iend, size_t const max_ = unsigned( -1 ) )
{
typedef typename std::iterator_traits<II>::value_type CH;
size_t retval = 0;
while( iend != istr && REGEX_CHAR(CH,'0') <= *istr && REGEX_CHAR(CH,'9') >= *istr && max_ > retval )
{
retval *= 10;
retval += ( size_t )( *istr - REGEX_CHAR(CH,'0') );
++istr;
}
if( max_ < retval )
{
retval /= 10;
--istr;
}
return retval;
}
// Here is the implementation for the regex_arena class.
// It takes advantage of the fact that all subexpression objects
// allocated during pattern compilation will be freed all at once.
// The sub_expr, custom_charset and basic_rpattern classes all must
// cooperate with this degenerate allocation scheme. But it is fast
// and effective. My patterns compile 40% faster with it. YMMV.
// NathanN:
// By defining the symbol REGEX_DEBUG_HEAP the allocator object
// no longer sub allocates memory. This enables heap checking tools like
// AppVerifier & PageHeap to find errors like buffer overruns
#ifndef REGEX_DEBUG_HEAP
# if REGEX_DEBUG
# define REGEX_DEBUG_HEAP 1
# else
# define REGEX_DEBUG_HEAP 0
# endif
#endif
REGEXPR_H_INLINE size_t DEFAULT_BLOCK_SIZE()
{
# if REGEX_DEBUG_HEAP
// put each allocation in its own block
return 1;
# else
// put multiple allocation in each block
return 352;
# endif
}
struct regex_arena::block
{
block * m_pnext;
size_t m_offset;
enum { HEADER_SIZE = sizeof( block* ) + sizeof( size_t ) };
unsigned char m_data[ 1 ];
};
inline void regex_arena::_new_block( size_t size )
{
size_t blocksize = (std::max)( m_default_size, size ) + block::HEADER_SIZE;
block * pnew = static_cast<block*>( ::operator new( blocksize ) );
pnew->m_offset = 0;
pnew->m_pnext = m_pfirst;
m_pfirst = pnew;
}
REGEXPR_H_INLINE regex_arena::regex_arena( size_t default_size )
: m_pfirst( NULL ), m_default_size( default_size )
{
}
REGEXPR_H_INLINE regex_arena::~regex_arena()
{
deallocate();
}
REGEXPR_H_INLINE void regex_arena::deallocate()
{
for( block * pnext; m_pfirst; m_pfirst = pnext )
{
pnext = m_pfirst->m_pnext;
::operator delete( static_cast<void*>( m_pfirst ) );
}
}
struct not_pod
{
virtual ~not_pod() {}
};
REGEXPR_H_INLINE void * regex_arena::allocate( size_t size )
{
if( 0 == size )
size = 1;
if( NULL == m_pfirst || m_pfirst->m_offset + size > m_default_size )
_new_block( size );
void * pnew = m_pfirst->m_data + m_pfirst->m_offset;
// ensure returned pointers are always suitably aligned
m_pfirst->m_offset += ( ( size + alignof<not_pod>::value - 1 )
& ~( alignof<not_pod>::value - 1 ) );
return pnew;
}
REGEXPR_H_INLINE size_t regex_arena::max_size() const
{
return size_t( -1 );
}
REGEXPR_H_INLINE void regex_arena::swap( regex_arena & that )
{
std::swap( m_pfirst, that.m_pfirst );
std::swap( m_default_size, that.m_default_size );
}
template< typename T >
inline void regex_destroy( T * pt ) { pt; pt->~T(); }
inline void regex_destroy( char * ) {}
inline void regex_destroy( wchar_t * ) {}
////
// regex_allocator is a proper STL allocator. It is a thin
// wrapper around the regex_arrena object. Note that deallocate
// does nothing. Memory isn't freed until the arena object
// gets destroyed.
template< typename T >
struct regex_allocator
{
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef T *pointer;
typedef T const *const_pointer;
typedef T & reference;
typedef T const & const_reference;
typedef T value_type;
regex_allocator( regex_arena & arena )
: m_arena( arena ) { align_check(); }
#if !defined(_MSC_VER) | _MSC_VER >= 1300
regex_allocator( regex_allocator const & alloc )
: m_arena( alloc.m_arena ) { align_check(); }
#endif
template< typename U >
regex_allocator( regex_allocator<U> const & alloc )
: m_arena( alloc.m_arena ) { align_check(); }
pointer address( reference x ) const
{return &x;}
const_pointer address( const_reference x ) const
{return &x;}
pointer allocate( size_type size, void const * =0 )
{return static_cast<pointer>( m_arena.allocate( size * sizeof( T ) ) ); }
char *_Charalloc( size_type size )
{return static_cast<char*>( m_arena.allocate( size ) ); }
void deallocate( void *, size_type )
{}
void construct( pointer p, T const & t )
{new( ( void* )p ) T( t );}
void destroy( pointer p )
{regex_destroy( p );}
size_t max_size() const
{size_t size = m_arena.max_size() / sizeof( T );
return ( 0 < size ? size : 1 );}
template< typename U > struct rebind
{typedef regex_allocator<U> other;};
// BUGBUG after rpattern::swap, all regex_allocator
// objects refer to the wrong arena.
regex_arena & m_arena;
private:
regex_allocator & operator=( regex_allocator const & );
static void align_check()
{
// The regex_arena uses not_pod to align memory. Use a compile-time
// assertion to make sure that T does not have worse alignment than not_pod.
static_assert<( ( size_t ) alignof<T>::value <= ( size_t ) alignof<not_pod>::value )> const align_check;
( void ) align_check;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -