📄 regexpr2.cpp

📁 代理服务器原代码
💻 CPP
📖 第 1 页 / 共 5 页
字号:
            return false;\
        }\
        return ( m_posixcharsoff.end() !=\
                 std::find_if( m_posixcharsoff.begin(), m_posixcharsoff.end(),\
                               std::not1( std::bind1st( std::ptr_fun( CTYPE ), ch ) ) ) )\
            || ( m_nestedcharsets.end() !=\
                 std::find_if( m_nestedcharsets.begin(), m_nestedcharsets.end(),\
                               std::bind2nd( regex::detail::mem_fun( PMF ), ch ) ) );\
    }

    DECLARE_EXTENDED_CHECK(extended_check_narrow,char,_isctype,&charset::in_narrow)
    DECLARE_EXTENDED_CHECK(extended_check_wide_with_case,wchar_t,iswctype,&charset::in_wide_with_case)
    DECLARE_EXTENDED_CHECK(extended_check_wide_no_case,wchar_t,iswctype,&charset::in_wide_no_case)
#undef DECLARE_EXTENDED_CHECK

    // Note overloading based on parameter
    bool in_narrow( char ch ) const
    {
        // Whoops, forgot to call optimize() on this charset
        assert( 0 == m_posixcharson );

        return m_fcompliment !=
               (
                    ( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] )
                 || ( extended_check_narrow( ch ) )
               );
    }

    inline bool in_range_vector_with_case( wchar_t ch ) const
    {
        return std::binary_search( m_range_vector.begin(), m_range_vector.end(),
            range_type( ch, ch ), range_less() );
    }

    inline bool in_range_vector_no_case( wchar_t ch ) const
    {
        wchar_t const chup = regex_toupper( ch );
        if( std::binary_search( m_range_vector.begin(), m_range_vector.end(),
                range_type( chup, chup ), range_less() ) )
            return true;

        wchar_t const chlo = regex_tolower( ch );
        if( chup != chlo &&
            std::binary_search( m_range_vector.begin(), m_range_vector.end(),
                range_type( chlo, chlo ), range_less() ) )
            return true;

        return false;
    }

    // Note overloading based on parameter
    bool in_wide_with_case( wchar_t ch ) const
    {
        // use range_match_type to see if this character is within one of the
        // ranges stored in m_rgranges.
        return m_fcompliment !=
               (
                    ( ( UCHAR_MAX >= ch ) ?
                      ( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] ) :
                      (    ( ! m_range_vector.empty() && in_range_vector_with_case( ch ) )
                        || ( m_posixcharson && iswctype( ch, m_posixcharson ) ) ) )
                 || ( extended_check_wide_with_case( ch ) )
               );
    }

    // Note overloading based on parameter
    bool in_wide_no_case( wchar_t ch ) const
    {
        // use range_match_type to see if this character is within one of the
        // ranges stored in m_rgranges.
        return m_fcompliment !=
               (
                    ( ( UCHAR_MAX >= ch ) ?
                      ( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] ) :
                      (    ( ! m_range_vector.empty() && in_range_vector_no_case( ch ) )
                        || ( m_posixcharson && iswctype( ch, m_posixcharson ) ) ) )
                 || ( extended_check_wide_no_case( ch ) )
               );
    }

    bool in( char ch, true_t ) const
    {
        return in_narrow( ch );
    }

    bool in( char ch, false_t ) const
    {

        return in_narrow( ch );
    }

    bool in( wchar_t ch, true_t ) const
    {
        return in_wide_with_case( ch );
    }

    bool in( wchar_t ch, false_t ) const
    {
        return in_wide_no_case( ch );
    }

private:
    charset_t & operator=( charset_t const & that );
    charset_t( charset_t const & that );
};

// Intrinsic character sets are allocated on the heap with the standard allocator.
// They are either the built-in character sets, or the user-defined ones.
struct charset : public charset_t< std::allocator<range_type>,
                                   std::allocator<wctype_t>,
                                   std::allocator<charset const *> >
{
    charset()
    {
    }
private:
    charset( charset const & );
    charset & operator=( charset const & );
};

// charset is no longer an incomplete type so we now
// know how to destroy one. free_charset() is used in syntax2.h
REGEXPR_H_INLINE void free_charset( charset const * pcharset )
{
    delete pcharset;
}

// Custom character sets are the ones that appear in patterns between
// square brackets.  They are allocated in a regex_arena to speed up
// pattern compilation and to make rpattern clean-up faster.
struct custom_charset : public charset_t< REGEX_ALLOCATOR< range_type >,
                                          REGEX_ALLOCATOR< wctype_t >,
                                          REGEX_ALLOCATOR< charset const * > >
{
    typedef REGEX_ALLOCATOR< range_type > A1;
    typedef REGEX_ALLOCATOR< wctype_t > A2;
    typedef REGEX_ALLOCATOR< charset const * > A3;

    static void * operator new( size_t size, regex_arena & arena )
    {
        return arena.allocate( size );
    }
    static void operator delete( void *, regex_arena & ) {}
    static void operator delete( void * ) {}

    custom_charset( regex_arena & arena )
        : charset_t<A1, A2, A3>( MAKE_ALLOCATOR(range_type,arena),
                                 MAKE_ALLOCATOR(wctype_t,arena),
                                 MAKE_ALLOCATOR(charset const*,arena) )
    {
    }
private:
    custom_charset( custom_charset const & );
    custom_charset & operator=( custom_charset const & );
};

template< typename CH >
class intrinsic_charsets
{
    struct intrinsic_charset : public charset
    {
        intrinsic_charset( bool fcompliment, wctype_t desc, char const * sz )
        {
            reset( fcompliment, desc, sz );
        }
        void reset( bool fcompliment, wctype_t desc, char const * sz )
        {
            clear();
            m_fcompliment = fcompliment;
            m_fskip_extended_check = true;
            _set( desc, type2type<CH>() );
            for( ; *sz; ++sz )
                m_ascii_bitvector.set( static_cast<unsigned char>( *sz ) );
        }
    protected:
        void _set( wctype_t desc, type2type<char> )
        {
            m_ascii_bitvector.zero();
            for( unsigned int i=0; i<=UCHAR_MAX; ++i )
                if( _isctype( i, desc ) )
                    m_ascii_bitvector.set( static_cast<unsigned char>( i ) );
        }
        void _set( wctype_t desc, type2type<wchar_t> )
        {
            _set( desc, type2type<char>() );
            m_posixcharson = desc;
        }
    private:
        intrinsic_charset( intrinsic_charset const & );
        intrinsic_charset & operator=( intrinsic_charset const & );
    };

    static intrinsic_charset & _get_word_charset()
    {
        static intrinsic_charset s_word_charset( false, _ALPHA|_DIGIT, "_" );
        return s_word_charset;
    }
    static intrinsic_charset & _get_digit_charset()
    {
        static intrinsic_charset s_digit_charset( false, _DIGIT, "" );
        return s_digit_charset;
    }
    static intrinsic_charset & _get_space_charset()
    {
        static intrinsic_charset s_space_charset( false, _SPACE, "" );
        return s_space_charset;
    }
    static intrinsic_charset & _get_not_word_charset()
    {
        static intrinsic_charset s_not_word_charset( true, _ALPHA|_DIGIT, "_" );

        return s_not_word_charset;
    }
    static intrinsic_charset & _get_not_digit_charset()
    {
        static intrinsic_charset s_not_digit_charset( true, _DIGIT, "" );
        return s_not_digit_charset;
    }
    static intrinsic_charset & _get_not_space_charset()
    {
        static intrinsic_charset s_not_space_charset( true, _SPACE, "" );
        return s_not_space_charset;
    }
public:
    static charset const & get_word_charset()
    {
        return _get_word_charset();
    }
    static charset const & get_digit_charset()
    {
        return _get_digit_charset();
    }
    static charset const & get_space_charset()
    {
        return _get_space_charset();
    }
    static charset const & get_not_word_charset()
    {
        return _get_not_word_charset();
    }
    static charset const & get_not_digit_charset()
    {
        return _get_not_digit_charset();
    }
    static charset const & get_not_space_charset()
    {
        return _get_not_space_charset();
    }
    static void reset()
    {
        _get_word_charset().reset( false, _ALPHA|_DIGIT, "_" );
        _get_digit_charset().reset( false, _DIGIT, "" );
        _get_space_charset().reset( false, _SPACE, "" );
        _get_not_word_charset().reset( true, _ALPHA|_DIGIT, "_" );
        _get_not_digit_charset().reset( true, _DIGIT, "" );
        _get_not_space_charset().reset( true, _SPACE, "" );
    }
};

//
// Operator implementations
//

// Evaluates the beginning-of-string condition
template< typename CSTRINGS >
struct bos_t
{
    template< typename CI >
    static bool eval( match_param<CI> const & param, CI iter )
    {
        return param.ibegin == iter;
    }
    template< typename U > struct rebind { typedef bos_t<U> other; };
};

// Find the beginning of a line, either beginning of a string, or the character
// immediately following a newline
template< typename CSTRINGS >
struct bol_t
{
    template< typename CI >
    static bool eval( match_param<CI> const & param, CI iter )
    {
        typedef typename std::iterator_traits<CI>::value_type CH;
        typedef std::char_traits<CH> traits_type;

        return param.ibegin == iter || traits_type::eq( REGEX_CHAR(CH,'\n'), *--iter );
    }
    template< typename U > struct rebind { typedef bol_t<U> other; };
};

// Evaluates end-of-string condition for string's
template< typename CSTRINGS >
struct eos_t
{
    template< typename CI >
    static bool eval( match_param<CI> const & param, CI iter )
    {
        return param.istop == iter;
    }
    template< typename U > struct rebind { typedef eos_t<U> other; };
};
template<>
struct eos_t<true_t>
{
    template< typename CI >
    static bool eval( match_param<CI> const &, CI iter )
    {
        typedef typename std::iterator_traits<CI>::value_type CH;
        typedef std::char_traits<CH> traits_type;

        return traits_type::eq( REGEX_CHAR(CH,'\0'), *iter );
    }
    template< typename U > struct rebind { typedef eos_t<U> other; };
};

// Evaluates end-of-line conditions, either the end of the string, or a
// newline character.
template< typename CSTRINGS >
struct eol_t
{
    template< typename CI >
    static bool eval( match_param<CI> const & param, CI iter )
    {
        typedef typename std::iterator_traits<CI>::value_type CH;
        typedef std::char_traits<CH> traits_type;

        return param.istop == iter
            || traits_type::eq( REGEX_CHAR(CH,'\n'), *iter );
    }
    template< typename U > struct rebind { typedef eol_t<U> other; };
};
template<>
struct eol_t<true_t>
{
    template< typename CI >
    static bool eval( match_param<CI> const &, CI iter )
    {
        typedef typename std::iterator_traits<CI>::value_type CH;
        typedef std::char_traits<CH> traits_type;

        return traits_type::eq( REGEX_CHAR(CH,'\0'), *iter )
            || traits_type::eq( REGEX_CHAR(CH,'\n'), *iter );
    }
💿 文件大小 180 K
👤 上传用户 weizik
📂 所属分类源码/资料
🏷️ 相关标签

#代理服务器 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -