⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexp.cpp

📁 ncbi源码
💻 CPP
字号:
/* * =========================================================================== * PRODUCTION $Log: regexp.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:40:23  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * PRODUCTION * =========================================================================== *//*  $Id: regexp.cpp,v 1000.2 2004/06/01 19:40:23 gouriano Exp $ * =========================================================================== * *                            PUBLIC DOMAIN NOTICE *               National Center for Biotechnology Information * *  This software/database is a "United States Government Work" under the *  terms of the United States Copyright Act.  It was written as part of *  the author's official duties as a United States Government employee and *  thus cannot be copyrighted.  This software/database is freely available *  to the public for use. The National Library of Medicine and the U.S. *  Government have not placed any restriction on its use or reproduction. * *  Although all reasonable efforts have been taken to ensure the accuracy *  and reliability of the software and data, the NLM and the U.S. *  Government do not and cannot warrant the performance or results that *  may be obtained by using this software or data. The NLM and the U.S. *  Government disclaim all warranties, express or implied, including *  warranties of performance, merchantability or fitness for any particular *  purpose. * *  Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Clifford Clausen * File Description: *         C++ wrappers for Perl Compatible Regular Expression (pcre) library * * =========================================================================== */#include <ncbi_pch.hpp>#include <corelib/ncbi_limits.h>#include <util/regexp.hpp>#include <memory>#include <stdlib.h>BEGIN_NCBI_SCOPE//////////////////////////////////////////////////////////////////////////////////  CRegexpException//class CRegexpException : public CException{public:    enum EErrCode {        eCompile    };    virtual const char* GetErrCodeString(void) const {        switch ( GetErrCode() ) {        case eCompile:         return "eCompile";        default:               return CException::GetErrCodeString();        }    }    NCBI_EXCEPTION_DEFAULT(CRegexpException,CException);};//////////////////////////////////////////////////////////////////////////////////  CRegexp//CRegexp::CRegexp(const string& pattern, TCompile flags)    : m_NumFound(0){    const char *err;    int err_offset;    m_PReg = pcre_compile(pattern.c_str(), flags, &err, &err_offset, NULL);    if (m_PReg == NULL) {        NCBI_THROW(CRegexpException, eCompile, "Compilation of the pattern '" +                   pattern + "' failed: " + err);    }}CRegexp::~CRegexp(){    (*pcre_free)(m_PReg);}void CRegexp::Set(const string& pattern, TCompile flags){    if (m_PReg != NULL) {        (*pcre_free)(m_PReg);    }    const char *err;    int err_offset;    m_PReg = pcre_compile(pattern.c_str(), flags, &err, &err_offset, NULL);    if (m_PReg == NULL) {        NCBI_THROW(CRegexpException, eCompile, "Compilation of the pattern '" +                   pattern + "' failed: " + err);    }}string CRegexp::GetSub(const string& str, size_t idx) const{    int start = m_Results[2 * idx];    int end   = m_Results[2 * idx + 1];    if ((int)idx >= m_NumFound  ||  start == -1  ||  end == -1) {        return kEmptyStr;    }    return str.substr(start, end - start);}string CRegexp::GetMatch(    const string& str,    TSeqPos       offset,    size_t        idx,    TMatch        flags,    bool          noreturn){    m_NumFound = pcre_exec(m_PReg, NULL, str.c_str(), (int)str.length(),                           (int)offset, flags, m_Results,                           (int)(kRegexpMaxSubPatterns +1) * 3);    if ( noreturn ) {        return kEmptyStr;    } else {        return GetSub(str, idx);    }}//////////////////////////////////////////////////////////////////////////////////  CRegexpUtil//CRegexpUtil::CRegexpUtil(const string& str)     : m_Content(str), m_IsDivided(false),      m_RangeStart(kEmptyStr), m_RangeEnd(kEmptyStr), m_Delimiter("\n"){    return;}void CRegexpUtil::SetRange(        const string& addr_start,        const string& addr_end,        const string& delimiter){    m_RangeStart = addr_start;    m_RangeEnd   = addr_end;    x_Divide(delimiter);}size_t CRegexpUtil::Replace(    const string&     search,    const string&     replace,    CRegexp::TCompile compile_flags,    CRegexp::TMatch   match_flags,    size_t            max_replace){    if ( search.empty() ) {        return 0;    }    size_t n_replace = 0;    // Fill shure that string is not divided.    x_Join();    // Compile regular expression.    CRegexp re(search, compile_flags);    size_t  start_pos = 0;    for (size_t count = 0; !(max_replace && count >= max_replace); count++) {        // Match pattern.        re.GetMatch(m_Content.c_str(), (int)start_pos, 0, match_flags, true);        int num_found = re.NumFound();        if (num_found <= 0) {            break;        }        // Substitute all subpatterns "$<digit>" to values in the "replace"        // string.        const int* result;        string     x_replace = replace;        size_t     pos = 0;        for (;;) {            // Find "$"            pos = x_replace.find("$", pos);            if (pos == NPOS) {                break;            }            // Try to convert string after the "$" to number            errno = 0;            const char* startptr = x_replace.c_str() + pos + 1;            char* endptr = 0;            long value = strtol(startptr, &endptr, 10);            if ( errno  ||  endptr == startptr  ||  !endptr  ||                 value < kMin_Int  ||  value > kMax_Int) {                // Format error, skip single "$".                pos++;                continue;            }            int n = (int)value;            // Get subpattern value            string subpattern;            if ( n > 0  &&  n < num_found ) {                result = re.GetResults(n);                if (result[0] >= 0  &&  result[1] >= 0) {                    subpattern = m_Content.substr(result[0],                                                  result[1] - result[0]);                }            }            // Check braces {$...}            size_t sp_start = pos;            size_t sp_end   = endptr - x_replace.c_str();            if ( sp_start > 0  &&  x_replace[sp_start-1] == '{') {                sp_start--;                if ( sp_end <  x_replace.length()  &&                     x_replace[sp_end] == '}') {                    sp_end++;                } else {                    // Format error -- missed closed brace.                    sp_start++;                }            }            // Replace $n with subpattern value.            x_replace.replace(sp_start, sp_end - sp_start, subpattern);            pos += subpattern.length();        }        // Replace pattern with "x_replace".        result = re.GetResults(0);        m_Content.replace(result[0], result[1] - result[0], x_replace);        n_replace++;        start_pos = result[0] + x_replace.length();    }    return n_replace;}size_t CRegexpUtil::ReplaceRange(    const string&       search,    const string&       replace,    CRegexp::TCompile   compile_flags,    CRegexp::TMatch     match_flags,    CRegexpUtil::ERange process_inside,    size_t              max_replace    ){    if ( search.empty() ) {        return 0;    }    size_t n_replace = 0;    // Split source string to parts by delimiter    x_Divide();    // Flag which denote that current line is inside "range"    bool inside = m_RangeStart.empty();    NON_CONST_ITERATE (list<string>, i, m_ContentList) {        // Get new line        string line = *i;        // Check beginning of block [addr_re_start:addr_re_end]        if ( !inside  &&  !m_RangeStart.empty() ) {            CRegexp re(m_RangeStart.c_str());            re.GetMatch(line.c_str(), 0, 0, 0, true);            inside = (re.NumFound() > 0);        } else {            inside = true;        }        // Process current line        if ( (inside  &&  process_inside == eInside)  ||             (!inside  &&  process_inside == eOutside) ) {            CRegexpUtil re(line);            n_replace += re.Replace(search, replace,                                    compile_flags, match_flags, max_replace);            *i = re;        }        // Check ending of block [addr_re_start:addr_re_end]        if ( inside  &&  !m_RangeEnd.empty() ) {            // Two addresses            CRegexp re(m_RangeEnd.c_str());            re.GetMatch(line.c_str(), 0, 0, 0, true);            inside = (re.NumFound() <= 0);        } else {            // One address -- process one current string only            inside = false;        }    }    return n_replace;}void CRegexpUtil::x_Divide(const string& delimiter){    string x_delimiter = delimiter.empty() ? m_Delimiter : delimiter;    if ( m_IsDivided  ) {        if ( x_delimiter == m_Delimiter ) {            return;        }        x_Join();    }    m_ContentList.clear();    // Split source string to parts by delimiter    size_t pos;    size_t start_pos = 0;    for (;;) {        pos = m_Content.find(x_delimiter, start_pos);        if (pos == NPOS) {            m_ContentList.push_back(m_Content.substr(start_pos));            break;        } else {            m_ContentList.push_back(m_Content.substr(start_pos,                                                     pos - start_pos));            start_pos = pos + x_delimiter.length();        }    }    m_IsDivided = true;    // Save delimiter for consecutive joining    m_Delimiter = x_delimiter;}void CRegexpUtil::x_Join(void){    if ( m_IsDivided ) {        m_Content = NStr::Join(m_ContentList, m_Delimiter);        m_IsDivided = false;    }}END_NCBI_SCOPE/* * =========================================================================== * $Log: regexp.cpp,v $ * Revision 1000.2  2004/06/01 19:40:23  gouriano * PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.7 * * Revision 1.7  2004/05/17 21:06:02  gorelenk * Added include of PCH ncbi_pch.hpp * * Revision 1.6  2003/11/07 17:16:23  ivanov * Fixed  warnings on 64-bit Workshop compiler * * Revision 1.5  2003/11/07 13:39:56  ivanov * Fixed lines wrapped at 79th columns * * Revision 1.4  2003/11/06 16:13:04  ivanov * Added CRegexpUtil class. Some formal code rearrangement. * * Revision 1.3  2003/07/16 19:13:50  clausen * Added TCompile and TMatch * * Revision 1.2  2003/06/20 18:26:37  clausen * Switched to native regexp interface * * Revision 1.1  2003/06/03 14:46:23  clausen * Initial version * * =========================================================================== */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -