regex_compiler.hpp

来自「support vector clustering for vc++」· HPP 代码 · 共 554 行 · 第 1/2 页

HPP
554
字号

        // restore the modifiers
        this->traits_.flags(old_flags);
        return seq;
    }

    ///////////////////////////////////////////////////////////////////////////
    // parse_charset
    /// INTERNAL ONLY
    detail::sequence<BidiIter> parse_charset(string_iterator &begin, string_iterator end)
    {
        detail::compound_charset<traits_type> chset;

        // call out to a helper to actually parse the character set
        detail::parse_charset(begin, end, chset, this->traits_);

        return detail::make_charset_xpression<BidiIter>
        (
            chset
          , this->rxtraits()
          , this->traits_.flags()
        );
    }

    ///////////////////////////////////////////////////////////////////////////
    // parse_atom
    /// INTERNAL ONLY
    detail::sequence<BidiIter> parse_atom(string_iterator &begin, string_iterator end)
    {
        using namespace regex_constants;
        escape_value esc = { 0, 0, 0, detail::escape_char };
        string_iterator old_begin = begin;

        switch(this->traits_.get_token(begin, end))
        {
        case token_literal:
            return detail::make_literal_xpression<BidiIter>
            (
                this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
            );

        case token_any:
            return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());

        case token_assert_begin_sequence:
            return detail::make_dynamic_xpression<BidiIter>(detail::assert_bos_matcher());

        case token_assert_end_sequence:
            return detail::make_dynamic_xpression<BidiIter>(detail::assert_eos_matcher());

        case token_assert_begin_line:
            return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());

        case token_assert_end_line:
            return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());

        case token_assert_word_boundary:
            return detail::make_assert_word<BidiIter>(detail::word_boundary<true>(), this->rxtraits());

        case token_assert_not_word_boundary:
            return detail::make_assert_word<BidiIter>(detail::word_boundary<false>(), this->rxtraits());

        case token_assert_word_begin:
            return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());

        case token_assert_word_end:
            return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());

        case token_escape:
            esc = this->parse_escape(begin, end);
            switch(esc.type_)
            {
            case detail::escape_mark:
                return detail::make_backref_xpression<BidiIter>
                (
                    esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
                );
            case detail::escape_char:
                return detail::make_char_xpression<BidiIter>
                (
                    esc.ch_, this->traits_.flags(), this->rxtraits()
                );
            case detail::escape_class:
                return detail::make_posix_charset_xpression<BidiIter>
                (
                    esc.class_
                  , this->rxtraits().isctype(*begin++, this->upper_)
                  , this->traits_.flags()
                  , this->rxtraits()
                );
            }

        case token_group_begin:
            return this->parse_group(begin, end);

        case token_charset_begin:
            return this->parse_charset(begin, end);

        case token_invalid_quantifier:
            throw regex_error(error_badrepeat, "quantifier not expected");

        case token_quote_meta_begin:
            return detail::make_literal_xpression<BidiIter>
            (
                this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
            );

        case token_quote_meta_end:
            throw regex_error
            (
                error_escape
              , "found quote-meta end without corresponding quote-meta begin"
            );

        case token_end_of_pattern:
            break;

        default:
            begin = old_begin;
            break;
        }

        return detail::sequence<BidiIter>();
    }

    ///////////////////////////////////////////////////////////////////////////
    // parse_quant
    /// INTERNAL ONLY
    detail::sequence<BidiIter> parse_quant(string_iterator &begin, string_iterator end)
    {
        BOOST_ASSERT(begin != end);
        detail::quant_spec spec = { 0, 0, false };
        detail::sequence<BidiIter> seq = this->parse_atom(begin, end);

        // BUGBUG this doesn't handle the degenerate (?:)+ correctly
        if(!seq.is_empty() && begin != end && seq.first->is_quantifiable())
        {
            if(this->traits_.get_quant_spec(begin, end, spec))
            {
                BOOST_ASSERT(spec.min_ <= spec.max_);

                if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
                {
                    seq = this->parse_quant(begin, end);
                }
                else
                {
                    seq = seq.first->quantify(spec, this->hidden_mark_count_, seq, alternates_factory());
                }
            }
        }

        return seq;
    }

    ///////////////////////////////////////////////////////////////////////////
    // parse_sequence
    /// INTERNAL ONLY
    detail::sequence<BidiIter> parse_sequence(string_iterator &begin, string_iterator end)
    {
        detail::sequence<BidiIter> seq;

        while(begin != end)
        {
            detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);

            // did we find a quantified atom?
            if(seq_quant.is_empty())
                break;

            // chain it to the end of the xpression sequence
            seq += seq_quant;
        }

        return seq;
    }

    ///////////////////////////////////////////////////////////////////////////
    // parse_literal
    //  scan ahead looking for char literals to be globbed together into a string literal
    /// INTERNAL ONLY
    string_type parse_literal(string_iterator &begin, string_iterator end)
    {
        using namespace regex_constants;
        BOOST_ASSERT(begin != end);
        BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
        escape_value esc = { 0, 0, 0, detail::escape_char };
        string_type literal(1, *begin);

        for(string_iterator prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
        {
            detail::quant_spec spec;
            if(this->traits_.get_quant_spec(tmp, end, spec))
            {
                if(literal.size() != 1)
                {
                    begin = prev;
                    literal.erase(literal.size() - 1);
                }
                return literal;
            }
            else switch(this->traits_.get_token(tmp, end))
            {
            case token_escape:
                esc = this->parse_escape(tmp, end);
                if(detail::escape_char != esc.type_) return literal;
                literal += esc.ch_;
                break;
            case token_literal:
                literal += *tmp++;
                break;
            default:
                return literal;
            }
        }

        return literal;
    }

    ///////////////////////////////////////////////////////////////////////////
    // parse_quote_meta
    //  scan ahead looking for char literals to be globbed together into a string literal
    /// INTERNAL ONLY
    string_type parse_quote_meta(string_iterator &begin, string_iterator end)
    {
        using namespace regex_constants;
        string_iterator old_begin = begin, old_end;
        while(end != (old_end = begin))
        {
            switch(this->traits_.get_token(begin, end))
            {
            case token_quote_meta_end: return string_type(old_begin, old_end);
            case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence");
            case token_literal: ++begin;
            default:;
            }
        }
        return string_type(old_begin, begin);
    }

    ///////////////////////////////////////////////////////////////////////////////
    // parse_escape
    /// INTERNAL ONLY
    escape_value parse_escape(string_iterator &begin, string_iterator end)
    {
        detail::ensure(begin != end, regex_constants::error_escape, "incomplete escape sequence");

        // first, check to see if this can be a backreference
        if(0 < this->rxtraits().value(*begin, 10))
        {
            // Parse at most 3 decimal digits.
            string_iterator tmp = begin;
            int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);

            // If the resulting number could conceivably be a backref, then it is.
            if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
            {
                begin = tmp;
                escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
                return esc;
            }
        }

        // Not a backreference, defer to the parse_escape helper
        return detail::parse_escape(begin, end, this->traits_);
    }

    std::size_t mark_count_;
    std::size_t hidden_mark_count_;
    CompilerTraits traits_;
    typename RegexTraits::char_class_type upper_;
};

}} // namespace boost::xpressive

#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?