parse_charset.hpp

来自「Boost provides free peer-reviewed portab」· HPP 代码 · 共 366 行

HPP
366
字号
///////////////////////////////////////////////////////////////////////////////// parse_charset.hpp////  Copyright 2008 Eric Niebler. Distributed under the Boost//  Software License, Version 1.0. (See accompanying file//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)#ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005#define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005// MS compatible compilers support #pragma once#if defined(_MSC_VER) && (_MSC_VER >= 1020)# pragma once#endif#include <boost/integer.hpp>#include <boost/mpl/bool.hpp>#include <boost/throw_exception.hpp>#include <boost/numeric/conversion/converter.hpp>#include <boost/xpressive/detail/detail_fwd.hpp>#include <boost/xpressive/detail/dynamic/parser_enum.hpp>#include <boost/xpressive/detail/utility/literals.hpp>#include <boost/xpressive/detail/utility/chset/chset.hpp>#include <boost/xpressive/regex_constants.hpp>namespace boost { namespace xpressive { namespace detail{enum escape_type{    escape_char  , escape_mark  , escape_class};///////////////////////////////////////////////////////////////////////////////// escape_value//template<typename Char, typename Class>struct escape_value{    Char ch_;    int mark_nbr_;    Class class_;    escape_type type_;};///////////////////////////////////////////////////////////////////////////////// char_overflow_handler//struct char_overflow_handler{    void operator ()(numeric::range_check_result result) const // throw(regex_error)    {        if(numeric::cInRange != result)        {            boost::throw_exception(                regex_error(                    regex_constants::error_escape                  , "character escape too large to fit in target character type"                )            );        }    }};///////////////////////////////////////////////////////////////////////////////// parse_escape//template<typename FwdIter, typename CompilerTraits>escape_value<typename iterator_value<FwdIter>::type, typename CompilerTraits::regex_traits::char_class_type>parse_escape(FwdIter &begin, FwdIter end, CompilerTraits &traits){    using namespace regex_constants;    typedef typename iterator_value<FwdIter>::type char_type;    typedef typename CompilerTraits::regex_traits regex_traits;    typedef typename regex_traits::char_class_type char_class_type;    // define an unsigned type the same size as char_type    typedef typename boost::uint_t<CHAR_BIT * sizeof(char_type)>::least uchar_t;    BOOST_MPL_ASSERT_RELATION(sizeof(uchar_t), ==, sizeof(char_type));    typedef numeric::conversion_traits<uchar_t, int> converstion_traits;    ensure(begin != end, error_escape, "unexpected end of pattern found");    numeric::converter<int, uchar_t, converstion_traits, char_overflow_handler> converter;    escape_value<char_type,char_class_type> esc = { 0, 0, 0, escape_char };    bool const icase = (0 != (regex_constants::icase_ & traits.flags()));    regex_traits const &rxtraits = traits.traits();    FwdIter tmp;    esc.class_ = rxtraits.lookup_classname(begin, begin + 1, icase);    if(0 != esc.class_)    {        esc.type_ = escape_class;        return esc;    }    if(-1 != rxtraits.value(*begin, 8))    {        esc.ch_ = converter(toi(begin, end, rxtraits, 8, 0777));        return esc;    }    switch(*begin)    {    // bell character    case BOOST_XPR_CHAR_(char_type, 'a'):        esc.ch_ = BOOST_XPR_CHAR_(char_type, '\a');        ++begin;        break;    // escape character    case BOOST_XPR_CHAR_(char_type, 'e'):        esc.ch_ = converter(27);        ++begin;        break;    // control character    case BOOST_XPR_CHAR_(char_type, 'c'):        ensure(++begin != end, error_escape, "unexpected end of pattern found");        ensure        (            rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'a'), BOOST_XPR_CHAR_(char_type, 'z'), *begin)         || rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'A'), BOOST_XPR_CHAR_(char_type, 'Z'), *begin)          , error_escape          , "invalid escape control letter; must be one of a-z or A-Z"        );        // Convert to character according to ECMA-262, section 15.10.2.10:        esc.ch_ = converter(*begin % 32);        ++begin;        break;    // formfeed character    case BOOST_XPR_CHAR_(char_type, 'f'):        esc.ch_ = BOOST_XPR_CHAR_(char_type, '\f');        ++begin;        break;    // newline    case BOOST_XPR_CHAR_(char_type, 'n'):        esc.ch_ = BOOST_XPR_CHAR_(char_type, '\n');        ++begin;        break;    // return    case BOOST_XPR_CHAR_(char_type, 'r'):        esc.ch_ = BOOST_XPR_CHAR_(char_type, '\r');        ++begin;        break;    // horizontal tab    case BOOST_XPR_CHAR_(char_type, 't'):        esc.ch_ = BOOST_XPR_CHAR_(char_type, '\t');        ++begin;        break;    // vertical tab    case BOOST_XPR_CHAR_(char_type, 'v'):        esc.ch_ = BOOST_XPR_CHAR_(char_type, '\v');        ++begin;        break;    // hex escape sequence    case BOOST_XPR_CHAR_(char_type, 'x'):        ensure(++begin != end, error_escape, "unexpected end of pattern found");        tmp = begin;        esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xff));        ensure(2 == std::distance(tmp, begin), error_escape, "invalid hex escape : "            "must be \\x HexDigit HexDigit");        break;    // Unicode escape sequence    case BOOST_XPR_CHAR_(char_type, 'u'):        ensure(++begin != end, error_escape, "unexpected end of pattern found");        tmp = begin;        esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xffff));        ensure(4 == std::distance(tmp, begin), error_escape, "invalid Unicode escape : "            "must be \\u HexDigit HexDigit HexDigit HexDigit");        break;    // backslash    case BOOST_XPR_CHAR_(char_type, '\\'):        //esc.ch_ = BOOST_XPR_CHAR_(char_type, '\\');        //++begin;        //break;    // all other escaped characters represent themselves    default:        esc.ch_ = *begin;        ++begin;        break;    }    return esc;}//////////////////////////////////////////////////////////////////////////// parse_charset//template<typename FwdIter, typename RegexTraits, typename CompilerTraits>inline void parse_charset(    FwdIter &begin  , FwdIter end  , compound_charset<RegexTraits> &chset  , CompilerTraits &traits){    using namespace regex_constants;    typedef typename RegexTraits::char_type char_type;    typedef typename RegexTraits::char_class_type char_class_type;    BOOST_ASSERT(begin != end);    RegexTraits const &rxtraits = traits.traits();    bool const icase = (0 != (regex_constants::icase_ & traits.flags()));    FwdIter iprev = FwdIter();    escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char};    bool invert = false;    // check to see if we have an inverse charset    if(begin != end && token_charset_invert == traits.get_charset_token(iprev = begin, end))    {        begin = iprev;        invert = true;    }    // skip the end token if-and-only-if it is the first token in the charset    if(begin != end && token_charset_end == traits.get_charset_token(iprev = begin, end))    {        for(; begin != iprev; ++begin)        {            chset.set_char(*begin, rxtraits, icase);        }    }    compiler_token_type tok;    char_type ch_prev = char_type(), ch_next = char_type();    bool have_prev = false;    ensure(begin != end, error_brack, "unexpected end of pattern found");    // remember the current position and grab the next token    iprev = begin;    tok = traits.get_charset_token(begin, end);    do    {        ensure(begin != end, error_brack, "unexpected end of pattern found");        if(token_charset_hyphen == tok && have_prev)        {            // remember the current position            FwdIter iprev2 = begin;            have_prev = false;            // ch_prev is lower bound of a range            switch(traits.get_charset_token(begin, end))            {            case token_charset_hyphen:            case token_charset_invert:                begin = iprev2; // un-get these tokens and fall through            case token_literal:                ch_next = *begin++;                detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range");                chset.set_range(ch_prev, ch_next, rxtraits, icase);                continue;            case token_charset_backspace:                ch_next = char_type(8); // backspace                detail::ensure(ch_prev <= ch_next, error_range, "invalid charset range");                chset.set_range(ch_prev, ch_next, rxtraits, icase);                continue;            case token_escape:                esc = parse_escape(begin, end, traits);                if(escape_char == esc.type_)                {                    detail::ensure(ch_prev <= esc.ch_, error_range, "invalid charset range");                    chset.set_range(ch_prev, esc.ch_, rxtraits, icase);                    continue;                }            case token_charset_end: // fall through            default:                // not a range.                begin = iprev;      // backup to hyphen token                chset.set_char(ch_prev, rxtraits, icase);                chset.set_char(*begin++, rxtraits, icase);                continue;            }        }        if(have_prev)        {            chset.set_char(ch_prev, rxtraits, icase);            have_prev = false;        }        switch(tok)        {        case token_charset_hyphen:        case token_charset_invert:        case token_charset_end:        case token_posix_charset_end:            begin = iprev; // un-get these tokens            ch_prev = *begin++;            have_prev = true;            continue;        case token_charset_backspace:            ch_prev = char_type(8); // backspace            have_prev = true;            continue;        case token_posix_charset_begin:            {                FwdIter tmp = begin, start = begin;                bool invert = (token_charset_invert == traits.get_charset_token(tmp, end));                if(invert)                {                    begin = start = tmp;                }                while(token_literal == (tok = traits.get_charset_token(begin, end)))                {                    tmp = ++begin;                    ensure(begin != end, error_brack, "unexpected end of pattern found");                }                if(token_posix_charset_end == tok)                {                    char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase);                    ensure(0 != chclass, error_ctype, "unknown class name");                    chset.set_class(chclass, invert);                    continue;                }                begin = iprev; // un-get this token                ch_prev = *begin++;                have_prev = true;            }            continue;        case token_escape:            esc = parse_escape(begin, end, traits);            if(escape_char == esc.type_)            {                ch_prev = esc.ch_;                have_prev = true;            }            else if(escape_class == esc.type_)            {                char_class_type upper_ = lookup_classname(rxtraits, "upper");                BOOST_ASSERT(0 != upper_);                chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_));            }            else            {                BOOST_ASSERT(false);            }            continue;        default:            ch_prev = *begin++;            have_prev = true;            continue;        }    }    while(ensure((iprev = begin) != end, error_brack, "unexpected end of pattern found"),          token_charset_end != (tok = traits.get_charset_token(begin, end)));    if(have_prev)    {        chset.set_char(ch_prev, rxtraits, icase);    }    if(invert)    {        chset.inverse();    }}}}} // namespace boost::xpressive::detail#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?