cpp_regex_traits.hpp

来自「support vector clustering for vc++」· HPP 代码 · 共 677 行 · 第 1/2 页

HPP
677
字号
    bool operator !=(cpp_regex_traits<char_type> const &that) const
    {
        return this->loc_ != that.loc_;
    }

    /// Convert a char to a Char
    ///
    /// \param ch The source character.
    /// \return std::use_facet<std::ctype<char_type> >(this->getloc()).widen(ch).
    char_type widen(char ch) const
    {
        return this->ctype_->widen(ch);
    }

    /// Returns a hash value for a Char in the range [0, UCHAR_MAX]
    ///
    /// \param ch The source character.
    /// \return a value between 0 and UCHAR_MAX, inclusive.
    static unsigned char hash(char_type ch)
    {
        return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
    }

    /// No-op
    ///
    /// \param ch The source character.
    /// \return ch
    static char_type translate(char_type ch)
    {
        return ch;
    }

    /// Converts a character to lower-case using the internally-stored std::locale.
    ///
    /// \param ch The source character.
    /// \return std::tolower(ch, this->getloc()).
    char_type translate_nocase(char_type ch) const
    {
        return this->ctype_->tolower(ch);
    }

    /// Returns a string_type containing all the characters that compare equal
    /// disregrarding case to the one passed in. This function can only be called
    /// if is_convertible<version_tag*, regex_traits_version_1_case_fold_tag*>::value
    /// is true.
    ///
    /// \param ch The source character.
    /// \return string_type containing all chars which are equal to ch when disregarding
    ///     case
    //typedef array<char_type, 2> fold_case_type;
    string_type fold_case(char_type ch) const
    {
        BOOST_MPL_ASSERT((is_same<char_type, char>));
        char_type ntcs[] = {
            this->ctype_->tolower(ch)
          , this->ctype_->toupper(ch)
          , 0
        };
        if(ntcs[1] == ntcs[0])
            ntcs[1] = 0;
        return string_type(ntcs);
    }

    /// Checks to see if a character is within a character range.
    ///
    /// \param first The bottom of the range, inclusive.
    /// \param last The top of the range, inclusive.
    /// \param ch The source character.
    /// \return first <= ch && ch <= last.
    static bool in_range(char_type first, char_type last, char_type ch)
    {
        return first <= ch && ch <= last;
    }

    /// Checks to see if a character is within a character range, irregardless of case.
    ///
    /// \param first The bottom of the range, inclusive.
    /// \param last The top of the range, inclusive.
    /// \param ch The source character.
    /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch, this->getloc())) ||
    ///     in_range(first, last, toupper(ch, this->getloc()))
    /// \attention The default implementation doesn't do proper Unicode
    ///     case folding, but this is the best we can do with the standard
    ///     ctype facet.
    bool in_range_nocase(char_type first, char_type last, char_type ch) const
    {
        // NOTE: this default implementation doesn't do proper Unicode
        // case folding, but this is the best we can do with the standard
        // std::ctype facet.
        return this->in_range(first, last, ch)
            || this->in_range(first, last, this->ctype_->toupper(ch))
            || this->in_range(first, last, this->ctype_->tolower(ch));
    }

    /// INTERNAL ONLY
    //string_type transform(char_type const *begin, char_type const *end) const
    //{
    //    return this->collate_->transform(begin, end);
    //}

    /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
    /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
    /// then v.transform(G1, G2) < v.transform(H1, H2).
    ///
    /// \attention Not used in xpressive 1.0
    template<typename FwdIter>
    string_type transform(FwdIter begin, FwdIter end) const
    {
        //string_type str(begin, end);
        //return this->transform(str.data(), str.data() + str.size());

        BOOST_ASSERT(false);
        return string_type();
    }

    /// Returns a sort key for the character sequence designated by the iterator range [F1, F2)
    /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2)
    /// when character case is not considered then
    /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2).
    /// 
    /// \attention Not used in xpressive 1.0
    template<typename FwdIter>
    string_type transform_primary(FwdIter begin, FwdIter end) const
    {
        BOOST_ASSERT(false); // TODO implement me
        return string_type();
    }

    /// Returns a sequence of characters that represents the collating element
    /// consisting of the character sequence designated by the iterator range [F1, F2).
    /// Returns an empty string if the character sequence is not a valid collating element.
    ///
    /// \attention Not used in xpressive 1.0
    template<typename FwdIter>
    string_type lookup_collatename(FwdIter begin, FwdIter end) const
    {
        BOOST_ASSERT(false); // TODO implement me
        return string_type();
    }

    /// For the character class name represented by the specified character sequence,
    /// return the corresponding bitmask representation.
    ///
    /// \param begin A forward iterator to the start of the character sequence representing
    ///     the name of the character class.
    /// \param end The end of the character sequence.
    /// \param icase Specifies whether the returned bitmask should represent the case-insensitive
    ///     version of the character class.
    /// \return A bitmask representing the character class.
    template<typename FwdIter>
    char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) const
    {
        static detail::umaskex_t const icase_masks =
            detail::std_ctype_lower | detail::std_ctype_upper;

        BOOST_ASSERT(begin != end);
        char_class_type char_class = this->lookup_classname_impl_(begin, end);
        if(0 == char_class)
        {
            // convert the string to lowercase
            string_type classname(begin, end);
            for(typename string_type::size_type i = 0, len = classname.size(); i < len; ++i)
            {
                classname[i] = this->translate_nocase(classname[i]);
            }
            char_class = this->lookup_classname_impl_(classname.begin(), classname.end());
        }
        // erase case-sensitivity if icase==true
        if(icase && 0 != (char_class & icase_masks))
        {
            char_class |= icase_masks;
        }
        return char_class;
    }

    /// Tests a character against a character class bitmask.
    ///
    /// \param ch The character to test.
    /// \param mask The character class bitmask against which to test.
    /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed
    ///     together.
    /// \return true if the character is a member of any of the specified character classes, false
    ///     otherwise.
    bool isctype(char_type ch, char_class_type mask) const
    {
        return this->base_type::is(*this->ctype_, ch, mask);
    }

    /// Convert a digit character into the integer it represents.
    ///
    /// \param ch The digit character.
    /// \param radix The radix to use for the conversion.
    /// \pre radix is one of 8, 10, or 16.
    /// \return -1 if ch is not a digit character, the integer value of the character otherwise. 
    ///     The conversion is performed by imbueing a std::stringstream with this->getloc();
    ///     setting the radix to one of oct, hex or dec; inserting ch into the stream; and
    ///     extracting an int.
    int value(char_type ch, int radix) const
    {
        BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
        int val = -1;
        std::basic_stringstream<char_type> str;
        str.imbue(this->getloc());
        str << (8 == radix ? std::oct : (16 == radix ? std::hex : std::dec));
        str.put(ch);
        str >> val;
        return str.fail() ? -1 : val;
    }

    /// Imbues *this with loc
    ///
    /// \param loc A std::locale.
    /// \return the previous std::locale used by *this.
    locale_type imbue(locale_type loc)
    {
        locale_type old_loc = this->loc_;
        this->loc_ = loc;
        this->ctype_ = &BOOST_USE_FACET(std::ctype<char_type>, this->loc_);
        //this->collate_ = &BOOST_USE_FACET(std::collate<char_type>, this->loc_);
        this->base_type::imbue(this->loc_);
        return old_loc;
    }

    /// Returns the current std::locale used by *this.
    ///
    locale_type getloc() const
    {
        return this->loc_;
    }

private:

    ///////////////////////////////////////////////////////////////////////////////
    // char_class_pair
    /// INTERNAL ONLY
    struct char_class_pair
    {
        char_type const *class_name_;
        char_class_type class_type_;
    };

    ///////////////////////////////////////////////////////////////////////////////
    // char_class
    /// INTERNAL ONLY
    static char_class_pair const &char_class(std::size_t j)
    {
        static char_class_pair const s_char_class_map[] =
        {
            { BOOST_XPR_CSTR_(char_type, "alnum"),  detail::std_ctype_alnum }
          , { BOOST_XPR_CSTR_(char_type, "alpha"),  detail::std_ctype_alpha }
          , { BOOST_XPR_CSTR_(char_type, "blank"),  detail::non_std_ctype_blank }
          , { BOOST_XPR_CSTR_(char_type, "cntrl"),  detail::std_ctype_cntrl }
          , { BOOST_XPR_CSTR_(char_type, "d"),      detail::std_ctype_digit }
          , { BOOST_XPR_CSTR_(char_type, "digit"),  detail::std_ctype_digit }
          , { BOOST_XPR_CSTR_(char_type, "graph"),  detail::std_ctype_graph }
          , { BOOST_XPR_CSTR_(char_type, "lower"),  detail::std_ctype_lower }
          , { BOOST_XPR_CSTR_(char_type, "newline"),detail::non_std_ctype_newline }
          , { BOOST_XPR_CSTR_(char_type, "print"),  detail::std_ctype_print }
          , { BOOST_XPR_CSTR_(char_type, "punct"),  detail::std_ctype_punct }
          , { BOOST_XPR_CSTR_(char_type, "s"),      detail::std_ctype_space }
          , { BOOST_XPR_CSTR_(char_type, "space"),  detail::std_ctype_space }
          , { BOOST_XPR_CSTR_(char_type, "upper"),  detail::std_ctype_upper }
          , { BOOST_XPR_CSTR_(char_type, "w"),      detail::std_ctype_alnum | detail::non_std_ctype_underscore }
          , { BOOST_XPR_CSTR_(char_type, "xdigit"), detail::std_ctype_xdigit }
          , { 0, 0 }
        };
        return s_char_class_map[j];
    }

    ///////////////////////////////////////////////////////////////////////////////
    // lookup_classname_impl
    /// INTERNAL ONLY
    template<typename FwdIter>
    static char_class_type lookup_classname_impl_(FwdIter begin, FwdIter end)
    {
        // find the classname
        typedef cpp_regex_traits<Char> this_t;
        for(std::size_t j = 0; 0 != this_t::char_class(j).class_name_; ++j)
        {
            if(this_t::compare_(this_t::char_class(j).class_name_, begin, end))
            {
                return this_t::char_class(j).class_type_;
            }
        }
        return 0;
    }

    /// INTERNAL ONLY
    template<typename FwdIter>
    static bool compare_(char_type const *name, FwdIter begin, FwdIter end)
    {
        for(; *name && begin != end; ++name, ++begin)
        {
            if(*name != *begin)
            {
                return false;
            }
        }
        return !*name && begin == end;
    }

    locale_type loc_;
    std::ctype<char_type> const *ctype_;
    //std::collate<char_type> const *collate_;
};

///////////////////////////////////////////////////////////////////////////////
// cpp_regex_traits<>::hash specializations
template<>
inline unsigned char cpp_regex_traits<unsigned char>::hash(unsigned char ch)
{
    return ch;
}

template<>
inline unsigned char cpp_regex_traits<char>::hash(char ch)
{
    return static_cast<unsigned char>(ch);
}

template<>
inline unsigned char cpp_regex_traits<signed char>::hash(signed char ch)
{
    return static_cast<unsigned char>(ch);
}

#ifndef BOOST_XPRESSIVE_NO_WREGEX
template<>
inline unsigned char cpp_regex_traits<wchar_t>::hash(wchar_t ch)
{
    return static_cast<unsigned char>(ch);
}
#endif

}}

#endif

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?