regex.cpp

来自「A*算法 A*算法 A*算法 A*算法A*算法A*算法」· C++ 代码 · 共 691 行 · 第 1/2 页

CPP
691
字号
                    // extended, so ignore them always.
                    if ( cptr[1] != _T('?') )
                        m_nMatches++;
                }
            }
        }

        m_isCompiled = true;
    }

    return IsValid();
}

#ifdef WXREGEX_USING_RE_SEARCH

// On GNU, regexec is implemented as a wrapper around re_search. re_search
// requires a length parameter which the POSIX regexec does not have,
// therefore regexec must do a strlen on the search text each time it is
// called. This can drastically affect performance when matching is done in
// a loop along a string, such as during a search and replace. Therefore if
// re_search is detected by configure, it is used directly.
//
static int ReSearch(const regex_t *preg,
                    const char *text,
                    size_t len,
                    re_registers *matches,
                    int eflags)
{
    regex_t *pattern = wx_const_cast(regex_t*, preg);

    pattern->not_bol = (eflags & REG_NOTBOL) != 0;
    pattern->not_eol = (eflags & REG_NOTEOL) != 0;
    pattern->regs_allocated = REGS_FIXED;

    int ret = re_search(pattern, text, len, 0, len, matches);
    return ret >= 0 ? 0 : REG_NOMATCH;
}

#endif // WXREGEX_USING_RE_SEARCH

bool wxRegExImpl::Matches(const wxRegChar *str,
                          int flags
                          WXREGEX_IF_NEED_LEN(size_t len)) const
{
    wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );

    // translate our flags to regexec() ones
    wxASSERT_MSG( !(flags & ~(wxRE_NOTBOL | wxRE_NOTEOL)),
                  _T("unrecognized flags in wxRegEx::Matches") );

    int flagsRE = 0;
    if ( flags & wxRE_NOTBOL )
        flagsRE |= REG_NOTBOL;
    if ( flags & wxRE_NOTEOL )
        flagsRE |= REG_NOTEOL;

    // allocate matches array if needed
    wxRegExImpl *self = wxConstCast(this, wxRegExImpl);
    if ( !m_Matches && m_nMatches )
    {
        self->m_Matches = new wxRegExMatches(m_nMatches);
    }

    wxRegExMatches::match_type matches = m_Matches ? m_Matches->get() : NULL;

    // do match it
#if defined WXREGEX_USING_BUILTIN
    int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, matches, flagsRE);
#elif defined WXREGEX_USING_RE_SEARCH
    int rc = str ? ReSearch(&self->m_RegEx, str, len, matches, flagsRE) : REG_BADPAT;
#else
    int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, matches, flagsRE) : REG_BADPAT;
#endif

    switch ( rc )
    {
        case 0:
            // matched successfully
            return true;

        default:
            // an error occurred
            wxLogError(_("Failed to find match for regular expression: %s"),
                       GetErrorMsg(rc, !str).c_str());
            // fall through

        case REG_NOMATCH:
            // no match
            return false;
    }
}

bool wxRegExImpl::GetMatch(size_t *start, size_t *len, size_t index) const
{
    wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
    wxCHECK_MSG( m_nMatches, false, _T("can't use with wxRE_NOSUB") );
    wxCHECK_MSG( m_Matches, false, _T("must call Matches() first") );
    wxCHECK_MSG( index < m_nMatches, false, _T("invalid match index") );

    if ( start )
        *start = m_Matches->Start(index);
    if ( len )
        *len = m_Matches->End(index) - m_Matches->Start(index);

    return true;
}

size_t wxRegExImpl::GetMatchCount() const
{
    wxCHECK_MSG( IsValid(), 0, _T("must successfully Compile() first") );
    wxCHECK_MSG( m_nMatches, 0, _T("can't use with wxRE_NOSUB") );

    return m_nMatches;
}

int wxRegExImpl::Replace(wxString *text,
                         const wxString& replacement,
                         size_t maxMatches) const
{
    wxCHECK_MSG( text, wxNOT_FOUND, _T("NULL text in wxRegEx::Replace") );
    wxCHECK_MSG( IsValid(), wxNOT_FOUND, _T("must successfully Compile() first") );

    // the input string
#ifndef WXREGEX_CONVERT_TO_MB
    const wxChar *textstr = text->c_str();
    size_t textlen = text->length();
#else
    const wxWX2MBbuf textstr = WXREGEX_CHAR(*text);
    if (!textstr)
    {
        wxLogError(_("Failed to find match for regular expression: %s"),
                   GetErrorMsg(0, true).c_str());
        return 0;
    }
    size_t textlen = strlen(textstr);
    text->clear();
#endif

    // the replacement text
    wxString textNew;

    // the result, allow 25% extra
    wxString result;
    result.reserve(5 * textlen / 4);

    // attempt at optimization: don't iterate over the string if it doesn't
    // contain back references at all
    bool mayHaveBackrefs =
        replacement.find_first_of(_T("\\&")) != wxString::npos;

    if ( !mayHaveBackrefs )
    {
        textNew = replacement;
    }

    // the position where we start looking for the match
    size_t matchStart = 0;

    // number of replacement made: we won't make more than maxMatches of them
    // (unless maxMatches is 0 which doesn't limit the number of replacements)
    size_t countRepl = 0;

    // note that "^" shouldn't match after the first call to Matches() so we
    // use wxRE_NOTBOL to prevent it from happening
    while ( (!maxMatches || countRepl < maxMatches) &&
            Matches(textstr + matchStart,
                    countRepl ? wxRE_NOTBOL : 0
                    WXREGEX_IF_NEED_LEN(textlen - matchStart)) )
    {
        // the string possibly contains back references: we need to calculate
        // the replacement text anew after each match
        if ( mayHaveBackrefs )
        {
            mayHaveBackrefs = false;
            textNew.clear();
            textNew.reserve(replacement.length());

            for ( const wxChar *p = replacement.c_str(); *p; p++ )
            {
                size_t index = (size_t)-1;

                if ( *p == _T('\\') )
                {
                    if ( wxIsdigit(*++p) )
                    {
                        // back reference
                        wxChar *end;
                        index = (size_t)wxStrtoul(p, &end, 10);
                        p = end - 1; // -1 to compensate for p++ in the loop
                    }
                    //else: backslash used as escape character
                }
                else if ( *p == _T('&') )
                {
                    // treat this as "\0" for compatbility with ed and such
                    index = 0;
                }

                // do we have a back reference?
                if ( index != (size_t)-1 )
                {
                    // yes, get its text
                    size_t start, len;
                    if ( !GetMatch(&start, &len, index) )
                    {
                        wxFAIL_MSG( _T("invalid back reference") );

                        // just eat it...
                    }
                    else
                    {
                        textNew += wxString(textstr + matchStart + start,
                                            *wxConvCurrent, len);

                        mayHaveBackrefs = true;
                    }
                }
                else // ordinary character
                {
                    textNew += *p;
                }
            }
        }

        size_t start, len;
        if ( !GetMatch(&start, &len) )
        {
            // we did have match as Matches() returned true above!
            wxFAIL_MSG( _T("internal logic error in wxRegEx::Replace") );

            return wxNOT_FOUND;
        }

        // an insurance against implementations that don't grow exponentially
        // to ensure building the result takes linear time
        if (result.capacity() < result.length() + start + textNew.length())
            result.reserve(2 * result.length());

#ifndef WXREGEX_CONVERT_TO_MB
        result.append(*text, matchStart, start);
#else
        result.append(wxString(textstr + matchStart, *wxConvCurrent, start));
#endif
        matchStart += start;
        result.append(textNew);

        countRepl++;

        matchStart += len;
    }

#ifndef WXREGEX_CONVERT_TO_MB
    result.append(*text, matchStart, wxString::npos);
#else
    result.append(wxString(textstr + matchStart, *wxConvCurrent));
#endif
    *text = result;

    return countRepl;
}

// ----------------------------------------------------------------------------
// wxRegEx: all methods are mostly forwarded to wxRegExImpl
// ----------------------------------------------------------------------------

void wxRegEx::Init()
{
    m_impl = NULL;
}

wxRegEx::~wxRegEx()
{
    delete m_impl;
}

bool wxRegEx::Compile(const wxString& expr, int flags)
{
    if ( !m_impl )
    {
        m_impl = new wxRegExImpl;
    }

    if ( !m_impl->Compile(expr, flags) )
    {
        // error message already given in wxRegExImpl::Compile
        delete m_impl;
        m_impl = NULL;

        return false;
    }

    return true;
}

bool wxRegEx::Matches(const wxChar *str, int flags, size_t len) const
{
    wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
    (void)len;

    return m_impl->Matches(WXREGEX_CHAR(str), flags WXREGEX_IF_NEED_LEN(len));
}

bool wxRegEx::Matches(const wxChar *str, int flags) const
{
    wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );

    return m_impl->Matches(WXREGEX_CHAR(str),
                           flags
                           WXREGEX_IF_NEED_LEN(wxStrlen(str)));
}

bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const
{
    wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );

    return m_impl->GetMatch(start, len, index);
}

wxString wxRegEx::GetMatch(const wxString& text, size_t index) const
{
    size_t start, len;
    if ( !GetMatch(&start, &len, index) )
        return wxEmptyString;

    return text.Mid(start, len);
}

size_t wxRegEx::GetMatchCount() const
{
    wxCHECK_MSG( IsValid(), 0, _T("must successfully Compile() first") );

    return m_impl->GetMatchCount();
}

int wxRegEx::Replace(wxString *pattern,
                     const wxString& replacement,
                     size_t maxMatches) const
{
    wxCHECK_MSG( IsValid(), wxNOT_FOUND, _T("must successfully Compile() first") );

    return m_impl->Replace(pattern, replacement, maxMatches);
}

#endif // wxUSE_REGEX

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?