⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 re_tokeniser_helper.hpp

📁 Boost provides free peer-reviewed portable C++ source libraries. We emphasize libraries that work
💻 HPP
字号:
// tokeniser_helper.hpp// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)//// Distributed under the Boost Software License, Version 1.0. (See accompanying// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H#define BOOST_LEXER_RE_TOKENISER_HELPER_H#include "../../char_traits.hpp"// strlen()#include <cstring>#include "../../size_t.hpp"#include "re_tokeniser_state.hpp"namespace boost{namespace lexer{namespace detail{template<typename CharT, typename Traits = char_traits<CharT> >class basic_re_tokeniser_helper{public:    typedef basic_re_tokeniser_state<CharT> state;    typedef std::basic_string<CharT> string;    static const CharT *escape_sequence (state &state_, CharT &ch_,        std::size_t &str_len_)    {        bool eos_ = state_.eos ();        if (eos_)        {            throw runtime_error ("Unexpected end of regex "                "following '\\'.");        }        const CharT *str_ = charset_shortcut (*state_._curr, str_len_);        if (str_)        {            state_.increment ();        }        else        {            ch_ = chr (state_);        }        return str_;    }    // This function can call itself.    static void charset (state &state_, string &chars_, bool &negated_)    {        CharT ch_ = 0;        bool eos_ = state_.next (ch_);        if (eos_)        {            // Pointless returning index if at end of string            throw runtime_error ("Unexpected end of regex "                "following '['.");        }        negated_ = ch_ == '^';        if (negated_)        {            eos_ = state_.next (ch_);            if (eos_)            {                // Pointless returning index if at end of string                throw runtime_error ("Unexpected end of regex "                    "following '^'.");            }        }        bool chset_ = false;        CharT prev_ = 0;        while (ch_ != ']')        {            if (ch_ == '\\')            {                std::size_t str_len_ = 0;                const CharT *str_ = escape_sequence (state_, prev_, str_len_);                chset_ = str_ != 0;                if (chset_)                {                    state temp_state_ (str_ + 1, str_ + str_len_,                        state_._case_sensitive, state_._locale,                        state_._dot_not_newline);                    string temp_chars_;                    bool temp_negated_ = false;                    charset (temp_state_, temp_chars_, temp_negated_);                    if (negated_ != temp_negated_)                    {                        std::ostringstream ss_;                        ss_ << "Mismatch in charset negation preceding "                            "index " << state_._index - 1 << '.';                        throw runtime_error (ss_.str ().c_str ());                    }                    chars_ += temp_chars_;                }            }/*            else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')            {                // TODO: POSIX charsets            }*/            else            {                chset_ = false;                prev_ = ch_;            }            eos_ = state_.next (ch_);            // Covers preceding if, else if and else            if (eos_)            {                // Pointless returning index if at end of string                throw runtime_error ("Unexpected end of regex "                    "(missing ']').");            }            if (ch_ == '-')            {                charset_range (chset_, state_, eos_, ch_, prev_, chars_);            }            else if (!chset_)            {                if (!state_._case_sensitive &&                    (std::isupper (prev_, state_._locale) ||                    std::islower (prev_, state_._locale)))                {                    CharT upper_ = std::toupper (prev_, state_._locale);                    CharT lower_ = std::tolower (prev_, state_._locale);                    chars_ += upper_;                    chars_ += lower_;                }                else                {                    chars_ += prev_;                }            }        }        if (!negated_ && chars_.empty ())        {            throw runtime_error ("Empty charsets not allowed.");        }    }private:    static const char *charset_shortcut (const char ch_,        std::size_t &str_len_)    {        const char *str_ = 0;        switch (ch_)        {        case 'd':            str_ = "[0-9]";            break;        case 'D':            str_ = "[^0-9]";            break;        case 's':            str_ = "[ \t\n\r\f\v]";            break;        case 'S':            str_ = "[^ \t\n\r\f\v]";            break;        case 'w':            str_ = "[_0-9A-Za-z]";            break;        case 'W':            str_ = "[^_0-9A-Za-z]";            break;        }        if (str_)        {            // Some systems have strlen in namespace std.            using namespace std;            str_len_ = strlen (str_);        }        else        {            str_len_ = 0;        }        return str_;    }    static const wchar_t *charset_shortcut (const wchar_t ch_,        std::size_t &str_len_)    {        const wchar_t *str_ = 0;        switch (ch_)        {        case 'd':            str_ = L"[0-9]";            break;        case 'D':            str_ = L"[^0-9]";            break;        case 's':            str_ = L"[ \t\n\r\f\v]";            break;        case 'S':            str_ = L"[^ \t\n\r\f\v]";            break;        case 'w':            str_ = L"[_0-9A-Za-z]";            break;        case 'W':            str_ = L"[^_0-9A-Za-z]";            break;        }        if (str_)        {            // Some systems have wcslen in namespace std.            using namespace std;            str_len_ = wcslen (str_);        }        else        {            str_len_ = 0;        }        return str_;    }    static CharT chr (state &state_)    {        CharT ch_ = 0;        // eos_ has already been checked for.        switch (*state_._curr)        {            case '0':            case '1':            case '2':            case '3':            case '4':            case '5':            case '6':            case '7':                ch_ = decode_octal (state_);                break;            case 'a':                ch_ = '\a';                state_.increment ();                break;            case 'b':                ch_ = '\b';                state_.increment ();                break;            case 'c':                ch_ = decode_control_char (state_);                break;            case 'e':                ch_ = 27; // '\e' not recognised by compiler                state_.increment ();                break;            case 'f':                ch_ = '\f';                state_.increment ();                break;            case 'n':                ch_ = '\n';                state_.increment ();                break;            case 'r':                ch_ = '\r';                state_.increment ();                break;            case 't':                ch_ = '\t';                state_.increment ();                break;            case 'v':                ch_ = '\v';                state_.increment ();                break;            case 'x':                ch_ = decode_hex (state_);                break;            default:                ch_ = *state_._curr;                state_.increment ();                break;        }        return ch_;    }    static CharT decode_octal (state &state_)    {        std::size_t accumulator_ = 0;        CharT ch_ = *state_._curr;        unsigned short count_ = 3;        bool eos_ = false;        for (;;)        {            accumulator_ *= 8;            accumulator_ += ch_ - '0';            --count_;            state_.increment ();            eos_ = state_.eos ();            if (!count_ || eos_) break;            ch_ = *state_._curr;            // Don't consume invalid chars!            if (ch_ < '0' || ch_ > '7')            {                break;            }        }        return static_cast<CharT> (accumulator_);    }    static CharT decode_control_char (state &state_)    {        // Skip over 'c'        state_.increment ();        CharT ch_ = 0;        bool eos_ = state_.next (ch_);        if (eos_)        {            // Pointless returning index if at end of string            throw runtime_error ("Unexpected end of regex following \\c.");        }        else        {            if (ch_ >= 'a' && ch_ <= 'z')            {                ch_ -= 'a' - 1;            }            else if (ch_ >= 'A' && ch_ <= 'Z')            {                ch_ -= 'A' - 1;            }            else if (ch_ == '@')            {                // Apparently...                ch_ = 0;            }            else            {                std::ostringstream ss_;                ss_ << "Invalid control char at index " <<                    state_._index - 1 << '.';                throw runtime_error (ss_.str ().c_str ());            }        }        return ch_;    }    static CharT decode_hex (state &state_)    {        // Skip over 'x'        state_.increment ();        CharT ch_ = 0;        bool eos_ = state_.next (ch_);        if (eos_)        {            // Pointless returning index if at end of string            throw runtime_error ("Unexpected end of regex following \\x.");        }        if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') ||            (ch_ >= 'A' && ch_ <= 'F')))        {            std::ostringstream ss_;            ss_ << "Illegal char following \\x at index " <<                state_._index - 1 << '.';            throw runtime_error (ss_.str ().c_str ());        }        std::size_t hex_ = 0;        do        {            hex_ *= 16;            if (ch_ >= '0' && ch_ <= '9')            {                hex_ += ch_ - '0';            }            else if (ch_ >= 'a' && ch_ <= 'f')            {                hex_ += 10 + (ch_ - 'a');            }            else            {                hex_ += 10 + (ch_ - 'A');            }            eos_ = state_.eos ();            if (!eos_)            {                ch_ = *state_._curr;                // Don't consume invalid chars!                if (((ch_ >= '0' && ch_ <= '9') ||                    (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F')))                {                    state_.increment ();                }                else                {                    eos_ = true;                }            }        } while (!eos_);        return static_cast<CharT> (hex_);    }    static void charset_range (const bool chset_, state &state_, bool &eos_,        CharT &ch_, const CharT prev_, string &chars_)    {        if (chset_)        {            std::ostringstream ss_;            ss_ << "Charset cannot form start of range preceding "                "index " << state_._index - 1 << '.';            throw runtime_error (ss_.str ().c_str ());        }        eos_ = state_.next (ch_);        if (eos_)        {            // Pointless returning index if at end of string            throw runtime_error ("Unexpected end of regex "                "following '-'.");        }        CharT curr_ = 0;        if (ch_ == '\\')        {            std::size_t str_len_ = 0;            if (escape_sequence (state_, curr_, str_len_))            {                std::ostringstream ss_;                ss_ << "Charset cannot form end of range preceding index "                    << state_._index << '.';                throw runtime_error (ss_.str ().c_str ());            }        }/*        else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')        {            std::ostringstream ss_;            ss_ << "POSIX char class cannot form end of range at "                "index " << state_._index - 1 << '.';            throw runtime_error (ss_.str ().c_str ());        }*/        else        {            curr_ = ch_;        }        eos_ = state_.next (ch_);        // Covers preceding if and else        if (eos_)        {            // Pointless returning index if at end of string            throw runtime_error ("Unexpected end of regex "                "(missing ']').");        }        std::size_t start_ = static_cast<typename Traits::index_type> (prev_);        std::size_t end_ = static_cast<typename Traits::index_type> (curr_);        // Semanic check        if (end_ < start_)        {            std::ostringstream ss_;            ss_ << "Invalid range in charset preceding index " <<                state_._index - 1 << '.';            throw runtime_error (ss_.str ().c_str ());        }        chars_.reserve (chars_.size () + (end_ + 1 - start_));        for (; start_ <= end_; ++start_)        {            CharT ch_ = static_cast<CharT> (start_);            if (!state_._case_sensitive &&                (std::isupper (ch_, state_._locale) ||                std::islower (ch_, state_._locale)))            {                CharT upper_ = std::toupper (ch_, state_._locale);                CharT lower_ = std::tolower (ch_, state_._locale);                chars_ += (upper_);                chars_ += (lower_);            }            else            {                chars_ += (ch_);            }        }    }};}}}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -