📄 regexpr2.cpp

📁 vsstylemanager1.0.4希望对大家有用啊,
💻 CPP
📖 第 1 页 / 共 5 页
字号:
    // merge one charset into another
    basic_charset & operator|=( other_type const & that )
    {
        if( that.m_fcompliment )
        {
            // If no posix-style character sets are used, then we can merge this
            // nested character set directly into the enclosing character set.
            if( wct_zero == that.m_posixcharson &&
                that.m_posixcharsoff.empty() &&
                that.m_nestedcharsets.empty() )
            {
                m_ascii_bitvector |= ~ that.m_ascii_bitvector;

                // append the inverse of that.m_ranges to this->m_ranges
                wchar_t chlow = UCHAR_MAX;
                typedef typename other_ranges_type::const_iterator iter_type;
                for( iter_type prg = that.m_ranges.begin(); that.m_ranges.end() != prg; ++prg )
                {
                    if( UCHAR_MAX + 1 != prg->first )
                        m_ranges.push_front( range_type( wchar_t( chlow+1 ), wchar_t( prg->first-1 ) ) );
                    chlow = prg->second;
                }
                if( WCHAR_MAX != chlow )
                    m_ranges.push_front( range_type( wchar_t( chlow+1 ), WCHAR_MAX ) );
            }
            else
            {
                // There is no simple way to merge this nested character
                // set into the enclosing character set, so we must save
                // a pointer to the nested character set in a list.
                m_nestedcharsets.push_front( &that );
            }
        }
        else
        {
            m_ascii_bitvector |= that.m_ascii_bitvector;
            std::copy( that.m_ranges.begin(),
                       that.m_ranges.end(),
                       std::front_inserter( m_ranges ) );

            m_posixcharson |= that.m_posixcharson;
            std::copy( that.m_posixcharsoff.begin(),
                       that.m_posixcharsoff.end(),
                       std::front_inserter( m_posixcharsoff ) );

            std::copy( that.m_nestedcharsets.begin(),
                       that.m_nestedcharsets.end(),
                       std::front_inserter( m_nestedcharsets ) );
        }
        return *this;
    }

    // Note overloading based on first parameter
    void set_bit( char ch, bool const fnocase )
    {
        if( fnocase )
        {
            m_ascii_bitvector.set( static_cast<unsigned char>( regex_tolower( ch ) ) );
            m_ascii_bitvector.set( static_cast<unsigned char>( regex_toupper( ch ) ) );
        }
        else
        {
            m_ascii_bitvector.set( static_cast<unsigned char>( ch ) );
        }
    }

    // Note overloading based on first parameter
    void set_bit( wchar_t ch, bool const fnocase )
    {
        if( UCHAR_MAX >= ch )
            set_bit( static_cast<char>( ch ), fnocase );
        else
            m_ranges.push_front( range_type( ch, ch ) );
    }

    // Note overloading based on first two parameters
    void set_bit_range( char ch1, char ch2, bool const fnocase )
    {
        if( static_cast<unsigned char>( ch1 ) > static_cast<unsigned char>( ch2 ) )
            throw bad_regexpr( "invalid range specified in character set" );

        if( fnocase )
        {
            // i is unsigned int to prevent overflow if ch2 is UCHAR_MAX
            for( unsigned int i = static_cast<unsigned char>( ch1 );
                 i <= static_cast<unsigned char>( ch2 ); ++i )
            {
                m_ascii_bitvector.set( static_cast<unsigned char>( regex_toupper( (char) i ) ) );
                m_ascii_bitvector.set( static_cast<unsigned char>( regex_tolower( (char) i ) ) );
            }
        }
        else
        {
            // i is unsigned int to prevent overflow if ch2 is UCHAR_MAX
            for( unsigned int i = static_cast<unsigned char>( ch1 );
                 i <= static_cast<unsigned char>( ch2 ); ++i )
            {
                m_ascii_bitvector.set( static_cast<unsigned char>( i ) );
            }
        }
    }

    // Note overloading based on first two parameters
    void set_bit_range( wchar_t ch1, wchar_t ch2, bool const fnocase )
    {
        if( ch1 > ch2 )
            throw bad_regexpr( "invalid range specified in character set" );

        if( UCHAR_MAX >= ch1 )
            set_bit_range( static_cast<char>( ch1 ), static_cast<char>( regex_min<wchar_t>( UCHAR_MAX, ch2 ) ), fnocase );

        if( UCHAR_MAX < ch2 )
            m_ranges.push_front( range_type( regex_max( static_cast<wchar_t>( UCHAR_MAX + 1 ), ch1 ), ch2 ) );
    }

    void optimize( type2type<wchar_t> )
    {
        if( m_ranges.begin() != m_ranges.end() )
        {
            // this sorts on range_type.m_pfirst ( uses operator<() for pair templates )
            m_ranges.sort();

            // merge ranges that overlap
            typename ranges_type::iterator icur=m_ranges.begin(), iprev=icur++;
            while( icur != m_ranges.end() )
            {
                if( icur->first <= iprev->second + 1 )
                {
                    iprev->second = regex_max( iprev->second, icur->second );
                    icur = m_ranges.erase( icur, iprev );
                }
                else
                {
                    iprev=icur++;
                }
            }
        }

        // For the ASCII range, merge the m_posixcharson info
        // into the ascii_bitvector
        if( wct_zero != m_posixcharson )
        {
            // BUGBUG this is kind of expensive. Think of a better way.
            for( unsigned int i=0; i<=UCHAR_MAX; ++i )
                if( regex_isctype( i, m_posixcharson ) )
                    m_ascii_bitvector.set( static_cast<unsigned char>( i ) );
        }

        // m_fskip_extended_check is a cache which tells us whether we
        // need to check the m_posixcharsoff and m_nestedcharsets vectors,
        // which would only be used in nested user-defined character sets
        m_fskip_extended_check = m_posixcharsoff.empty() && m_nestedcharsets.empty();
    }

    void optimize( type2type<char> )
    {
        optimize( type2type<wchar_t>() );

        // the posixcharson info was merged into the ascii bitvector,
        // so we don't need to ever call regex_isctype ever again.
        m_posixcharson = wct_zero;
    }

    template< bool CaseT, typename CharT >
    bool extended_check( CharT ch REGEX_VC6(COMMA bool2type<CaseT>) ) const
    {
        REGEX_ASSERT( m_fskip_extended_check == ( m_posixcharsoff.empty() && m_nestedcharsets.empty() ) );

        if( m_fskip_extended_check )
        {
            return false;
        }

        return ( m_posixcharsoff.end() !=
                 std::find_if( m_posixcharsoff.begin(), m_posixcharsoff.end(),
                               posixcharsoff_pred<CharT>( ch ) ) )
            || ( m_nestedcharsets.end() !=
                 std::find_if( m_nestedcharsets.begin(), m_nestedcharsets.end(),
                               in_charset_pred<CharT, CaseT>( ch ) ) );
    }

    inline bool in_ranges( wchar_t ch, true_t ) const
    {
        typedef typename ranges_type::const_iterator iter_type;
        iter_type ibegin = m_ranges.begin(), iend = m_ranges.end();

        return ibegin != iend &&
            std::binary_search( ibegin, iend, range_type( ch, ch ), range_less() );
    }

    inline bool in_ranges( wchar_t ch, false_t ) const
    {
        typedef typename ranges_type::const_iterator iter_type;
        iter_type ibegin = m_ranges.begin(), iend = m_ranges.end();

        if( ibegin == iend )
            return false;

        wchar_t const chup = regex_toupper( ch );
        if( std::binary_search( ibegin, iend, range_type( chup, chup ), range_less() ) )
            return true;

        wchar_t const chlo = regex_tolower( ch );
        if( chup == chlo )
            return false;

        return std::binary_search( ibegin, iend, range_type( chlo, chlo ), range_less() );
    }

    // Note overloading based on parameter
    template< bool CaseT >
    bool in( char ch REGEX_VC6(COMMA bool2type<CaseT>) ) const
    {
        // Whoops, forgot to call optimize() on this charset
        REGEX_ASSERT( wct_zero == m_posixcharson );

        return m_fcompliment !=
               (
                    ( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] )
                 || ( extended_check REGEX_NVC6(<CaseT>) ( ch REGEX_VC6(COMMA bool2type<CaseT>()) ) )
               );
    }

    // Note overloading based on parameter
    template< bool CaseT >
    bool in( wchar_t ch REGEX_VC6(COMMA bool2type<CaseT>) ) const
    {
        // use range_match_type to see if this character is within one of the
        // ranges stored in m_rgranges.
        return m_fcompliment !=
               (
                    ( ( UCHAR_MAX >= ch ) ?
                      ( m_ascii_bitvector[ static_cast<unsigned char>( ch ) ] ) :
                      (    ( in_ranges( ch, bool2type<CaseT>() ) )
                        || ( wct_zero != m_posixcharson && regex_iswctype( ch, m_posixcharson ) ) ) )
                 || ( extended_check REGEX_NVC6(<CaseT>) ( ch REGEX_VC6(COMMA bool2type<CaseT>()) ) )
               );
    }

private:
    basic_charset & operator=( basic_charset const & that );
    basic_charset( basic_charset const & that );
};

// Intrinsic character sets are allocated on the heap with the standard allocator.
// They are either the built-in character sets, or the user-defined ones.
struct charset : public basic_charset<std::allocator<char> >
{
    charset()
    {
    }
private:
    charset( charset const & );
    charset & operator=( charset const & );
};

// charset is no longer an incomplete type so we now
// know how to destroy one. free_charset() is used in syntax2.h
REGEXPR_H_INLINE void free_charset( charset const * pcharset )
{
    delete pcharset;
}

// Custom character sets are the ones that appear in patterns between
// square brackets.  They are allocated in a regex_arena to speed up
// pattern compilation and to make rpattern clean-up faster.
struct custom_charset : public basic_charset<regex_arena>
{
    static void * operator new( size_t size, regex_arena & arena )
    {
        return arena.allocate( size );
    }
    static void operator delete( void *, regex_arena & ) {}
    static void operator delete( void * ) {}

    custom_charset( regex_arena & arena )
        : basic_charset<regex_arena>( arena )
    {
    }
private:
    custom_charset( custom_charset const & );
    custom_charset & operator=( custom_charset const & );
};

template< typename CharT >
class intrinsic_charsets
{
    struct intrinsic_charset : public charset
    {
        intrinsic_charset( bool fcompliment, regex_ctype_t desc, char const * sz )
        {
            reset( fcompliment, desc, sz );
        }
        void reset( bool fcompliment, regex_ctype_t desc, char const * sz )
        {
            clear();
            m_fcompliment  = fcompliment;
            m_posixcharson = desc;
            for( ; *sz; ++sz )
                m_ascii_bitvector.set( static_cast<unsigned char>( *sz ) );
            optimize( type2type<CharT>() );
        }
    private:
        intrinsic_charset( intrinsic_charset const & );
        intrinsic_charset & operator=( intrinsic_charset const & );
    };

    static intrinsic_charset & _get_word_charset()
    {
        static intrinsic_charset s_word_charset( false, wct_alpha()|wct_digit(), "_" );
        return s_word_charset;
    }
    static intrinsic_charset & _get_digit_charset()
    {
        static intrinsic_charset s_digit_charset( false, wct_digit(), "" );
        return s_digit_charset;
    }
    static intrinsic_charset & _get_space_charset()
    {
        static intrinsic_charset s_space_charset( false, wct_space(), "" );
        return s_space_charset;
    }
    static intrinsic_charset & _get_not_word_charset()
    {
        static intrinsic_charset s_not_word_charset( true, wct_alpha()|wct_digit(), "_" );
        return s_not_word_charset;
    }
    static intrinsic_charset & _get_not_digit_charset()
    {
        static intrinsic_charset s_not_digit_charset( true, wct_digit(), "" );
        return s_not_digit_charset;
    }
    static intrinsic_charset & _get_not_space_charset()
    {
        static intrinsic_charset s_not_space_charset( true, wct_space(), "" );
        return s_not_space_charset;
    }
public:
    static charset const & get_word_charset()
    {
        return _get_word_charset();
    }
    static charset const & get_digit_charset()
    {
        return _get_digit_charset();
    }
    static charset const & get_space_charset()
    {
        return _get_space_charset();
    }
    static charset const & get_not_word_charset()
    {
        return _get_not_word_charset();
    }
    static charset const & get_not_digit_charset()
    {
        return _get_not_digit_charset();
    }
    static charset const & get_not_space_charset()
    {
        return _get_not_space_charset();
    }
    static void reset()
    {
        _get_word_charset().reset( false, wct_alpha()|wct_digit(), "_" );
        _get_digit_charset().reset( false, wct_digit(), "" );
        _get_space_charset().reset( false, wct_space(), "" );
        _get_not_word_charset().reset( true, wct_alpha()|wct_digit(), "_" );
        _get_not_digit_charset().reset( true, wct_digit(), "" );
        _get_not_space_charset().reset( true, wct_space(), "" );
    }
};

//
💿 文件大小 98 K
👤 上传用户 billhu
📂 所属分类其他
🏷️ 相关标签

#vsstylemanager #家
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -