📄 regexpr2.cpp

📁 vsstylemanager1.0.4希望对大家有用啊,
💻 CPP
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
//+---------------------------------------------------------------------------
//
//  Copyright ( C ) Microsoft, 1994 - 2002.
//
//  File:       regexpr2.cpp
//
//  Contents:   implementation for rpattern methods, definitions for all the
//              subexpression types used to perform the matching, the
//              charset class definition .
//
//  Classes:    too many to list here
//
//  Functions:
//
//  Author:     Eric Niebler ( ericne@microsoft.com )
//
//  History:    12-11-1998   ericne   Created
//              01-05-2001   ericne   Removed dependency on VC's choice
//                                    of STL iterator types.
//              08-15-2001   ericne   Removed regexpr class, moved match
//                                    state to match_results container.
//              09-17-2001   nathann  Add DEBUG_HEAP_SUPPORT
//              11-16-2001   ericne   Add stack-conservative algorithm
//
//----------------------------------------------------------------------------

#ifdef _MSC_VER
// unlimited inline expansion ( compile with /Ob1 or /Ob2 )
# pragma inline_recursion( on )
# pragma inline_depth( 255 )
// warning C4127: conditional expression is constant
// warning C4355: 'this' : used in base member initializer list
// warning C4702: unreachable code
// warning C4710: function 'blah' not inlined
// warning C4786: identifier was truncated to '255' characters in the debug information
# pragma warning( push )
# pragma warning( disable : 4127 4355 4702 4710 4786 )
#endif

#include <limits>
#include <cctype>
#include <cwchar>
#include <memory>
#include <cwctype>
#include <malloc.h>
#include <algorithm>

#ifdef __MWERKS__
# include <alloca.h>
#endif

// If the implementation file has been included in the header, then we
// need to mark some functions as inline to prevent them from being multiply
// defined.  But if the implementation file is not included in the header,
// we can't mark them as inline, otherwise the linker won't find them.
#ifdef REGEXPR_H
# define REGEXPR_H_INLINE inline
#else
# define REGEXPR_H_INLINE
# include "regexpr2.h"
#endif

#ifdef REGEX_TO_INCLUDE
# include REGEX_TO_INCLUDE
#endif

// $PORT$
// _alloca is not standard
#ifndef alloca
# define alloca _alloca
#endif

namespace regex
{

namespace detail
{

inline wctype_t REGEX_CDECL regex_wctype( char const * sz )
{
    using namespace std;
    return wctype( sz );
}

namespace
{

#ifdef __GLIBC__
struct regex_ctype_t
{
    int      m_ctype;
    wctype_t m_wctype;
};

#define REGEX_DECL_CTYPE(desc)                                                  \
    inline regex_ctype_t const & wct_ ## desc()                                 \
    {                                                                           \
        static regex_ctype_t const s_wct = { _IS ## desc, regex_wctype(#desc) };\
        return s_wct;                                                           \
    }

REGEX_DECL_CTYPE(alnum)
REGEX_DECL_CTYPE(alpha)
REGEX_DECL_CTYPE(blank)
REGEX_DECL_CTYPE(cntrl)
REGEX_DECL_CTYPE(digit)
REGEX_DECL_CTYPE(graph)
REGEX_DECL_CTYPE(lower)
REGEX_DECL_CTYPE(print)
REGEX_DECL_CTYPE(punct)
REGEX_DECL_CTYPE(space)
REGEX_DECL_CTYPE(upper)
REGEX_DECL_CTYPE(xdigit)
regex_ctype_t const wct_zero = { 0,  0 };

inline regex_ctype_t & operator |= ( regex_ctype_t & lhs, regex_ctype_t const & rhs )
{
    lhs.m_ctype  |= rhs.m_ctype;
    lhs.m_wctype |= rhs.m_wctype;
    return lhs;
}
inline regex_ctype_t operator | ( regex_ctype_t lhs, regex_ctype_t const & rhs )
{
    return lhs |= rhs;
}
inline int REGEX_CDECL regex_isctype( int ch, regex_ctype_t const & desc )
{
    return __isctype( ch, desc.m_ctype );
}
inline int REGEX_CDECL regex_iswctype( wint_t wc, regex_ctype_t desc )
{
    using namespace std;
    return iswctype( wc, desc.m_wctype );
}
inline bool operator == ( regex_ctype_t const & lhs, regex_ctype_t const & rhs )
{
    return lhs.m_ctype == rhs.m_ctype && lhs.m_wctype == rhs.m_wctype;
}
inline bool operator != ( regex_ctype_t const & lhs, regex_ctype_t const & rhs )
{
    return lhs.m_ctype != rhs.m_ctype || lhs.m_wctype != rhs.m_wctype;
}
#else
typedef wctype_t regex_ctype_t;

#define REGEX_DECL_CTYPE(desc)                                                   \
    inline regex_ctype_t const wct_ ## desc()                                    \
    {                                                                            \
        static regex_ctype_t const s_wct = regex_wctype(#desc);                  \
        return s_wct;                                                            \
    }

REGEX_DECL_CTYPE(alnum)
REGEX_DECL_CTYPE(alpha)
REGEX_DECL_CTYPE(cntrl)
REGEX_DECL_CTYPE(digit)
REGEX_DECL_CTYPE(graph)
REGEX_DECL_CTYPE(lower)
REGEX_DECL_CTYPE(print)
REGEX_DECL_CTYPE(punct)
REGEX_DECL_CTYPE(space)
REGEX_DECL_CTYPE(upper)
REGEX_DECL_CTYPE(xdigit)
regex_ctype_t const wct_zero    = 0;

#if defined(_MSC_VER) & ( _MSC_VER==1200 | defined(_CPPLIB_VER) )
inline regex_ctype_t const wct_blank() { return _BLANK; } // work around for bug in VC++
inline int REGEX_CDECL regex_isctype( int ch, regex_ctype_t desc )
{
    return _isctype( ch, static_cast<int>( desc ) );
}
#else
REGEX_DECL_CTYPE(blank)
inline int REGEX_CDECL regex_isctype( int ch, regex_ctype_t desc )
{
    using namespace std;
    return iswctype( btowc( ch ), desc );
}
#endif
inline int REGEX_CDECL regex_iswctype( wint_t wc, regex_ctype_t desc )
{
    using namespace std;
    return iswctype( wc, desc );
}
#endif
} // unnamed namespace

template< typename CStringsT, typename IterT >
bool _do_match_iterative( sub_expr_base<IterT> const * expr, match_param<IterT> & param, IterT icur, CStringsT );

// NathanN:
// By defining the symbol REGEX_DEBUG_HEAP the allocator object
// no longer sub allocates memory.  This enables heap checking tools like
// AppVerifier & PageHeap to find errors like buffer overruns
#if !defined( REGEX_DEBUG_HEAP ) & REGEX_DEBUG
# define REGEX_DEBUG_HEAP 1
#else
# define REGEX_DEBUG_HEAP 0
#endif

REGEXPR_H_INLINE size_t DEFAULT_BLOCK_SIZE()
{
#if REGEX_DEBUG_HEAP
    // put each allocation in its own mem_block
    return 1;
#else
    // put multiple allocation in each mem_block
    return 352;
#endif
}

template< typename IBeginT, typename IEndT >
inline size_t parse_int( IBeginT & ibegin, IEndT iend, size_t const max_ = size_t( -1 ) )
{
    typedef typename std::iterator_traits<IEndT>::value_type char_type;
    size_t retval = 0;
    while( iend != ibegin && REGEX_CHAR(char_type,'0') <= *ibegin && REGEX_CHAR(char_type,'9') >= *ibegin && max_ > retval )
    {
        retval *= 10;
        retval += static_cast<size_t>( *ibegin - REGEX_CHAR(char_type,'0') );
        ++ibegin;
    }
    if( max_ < retval )
    {
        retval /= 10;
        --ibegin;
    }
    return retval;
}

// --------------------------------------------------------------------------
//
// Class:       boyer_moore
//
// Description: fast sub-string search algorithm
//
// Members:     m_begin - iter to first char in pattern sequence
//              m_last  - iter to last char in pattern sequence
//              m_len   - length of the pattern sequence
//              m_off   - array of offsets, indexed by ASCII char values
//
// History:     6/8/2003 - ericne - Created
//
// --------------------------------------------------------------------------
template< typename IterT >
class boyer_moore
{
    typedef typename std::iterator_traits<IterT>::value_type    char_type;
    typedef typename std::char_traits<char_type>                traits_type;

    enum { OFFSET_SIZE = UCHAR_MAX + 1 };

    IterT               m_begin;
    IterT               m_last;
    char_type const*    m_low_last;
    unsigned char       m_len;
    unsigned char       m_off[ OFFSET_SIZE ];

    static unsigned char hash_char( char ch )           { return static_cast<unsigned char>( ch ); }
    static unsigned char hash_char( signed char ch )    { return static_cast<unsigned char>( ch ); }
    static unsigned char hash_char( unsigned char ch )  { return ch; }
    static unsigned char hash_char( wchar_t ch )        { return static_cast<unsigned char>( ch % OFFSET_SIZE ); }
    template< typename  CharT >
    static unsigned char REGEX_VC6(REGEX_CDECL) hash_char( CharT ch REGEX_VC6(...) )
    {
        return static_cast<unsigned char>( std::char_traits<CharT>::to_int_type( ch ) % OFFSET_SIZE );
    }

    // case-sensitive Boyer-Moore search
    template< typename OtherT >
    OtherT find_with_case( OtherT begin, OtherT end ) const
    {
        typedef typename std::iterator_traits<OtherT>::difference_type diff_type;
        diff_type const endpos = std::distance( begin, end );
        diff_type offset = m_len;

        for( diff_type curpos = offset; curpos < endpos; curpos += offset )
        {
            std::advance( begin, offset );

            IterT  pat_tmp = m_last;
            OtherT str_tmp = begin;

            for( ; traits_type::eq( *str_tmp, *pat_tmp );
                --pat_tmp, --str_tmp )
            {
                if( pat_tmp == m_begin )
                {
                    return str_tmp;
                }
            }

            offset = m_off[ hash_char( *begin ) ];
        }

        return end;
    }

    // case-insensitive Boyer-Moore search
    template< typename OtherT >
    OtherT find_without_case( OtherT begin, OtherT end ) const
    {
        typedef typename std::iterator_traits<OtherT>::difference_type diff_type;
        diff_type const endpos = std::distance( begin, end );
        diff_type offset = m_len;

        for( diff_type curpos = offset; curpos < endpos; curpos += offset )
        {
            std::advance( begin, offset );

            IterT               pat_tmp  = m_last;
            char_type const*    low_tmp  = m_low_last;
            OtherT              str_tmp  = begin;

            for( ; traits_type::eq( *str_tmp, *pat_tmp ) || traits_type::eq( *str_tmp, *low_tmp );
                --pat_tmp, --str_tmp, --low_tmp )
            {
                if( pat_tmp == m_begin )
                {
                    return str_tmp;
                }
            }

            offset = m_off[ hash_char( *begin ) ];
        }

        return end;
    }

public:
    // initialize the Boyer-Moore search data structure, using the
    // search sub-sequence to prime the pump.
    boyer_moore( IterT begin, IterT end, char_type const* lower = 0 )
        : m_begin( begin )
        , m_last( begin )
        , m_low_last( lower )
    {
        typedef typename std::iterator_traits<IterT>::difference_type diff_type;
        diff_type diff = std::distance( begin, end );
        m_len  = static_cast<unsigned char>( regex_min<diff_type>( diff, UCHAR_MAX ) );
        std::fill_n( m_off, ARRAYSIZE( m_off ), m_len );
        --m_len;

        for( unsigned char offset = m_len; offset; --offset, ++m_last )
        {
            m_off[ hash_char( *m_last ) ] = offset;
        }

        if( m_low_last )
        {
            for( unsigned char offset = m_len; offset; --offset, ++m_low_last )
            {
                unsigned char hash = hash_char( *m_low_last );
                m_off[ hash ] = regex_min( m_off[ hash ], offset );
            }
        }
    }

    template< typename OtherT >
    OtherT find( OtherT begin, OtherT end ) const
    {
        if( m_low_last )
        {
            return find_without_case( begin, end );
        }
        else
        {
            return find_with_case( begin, end );
        }
    }

    static void * operator new( size_t size, regex_arena & arena )
    {
        return arena.allocate( size );
12 3 4 5 下一页
💿 文件大小 98 K
👤 上传用户 billhu
📂 所属分类其他
🏷️ 相关标签

#vsstylemanager #家
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -