⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 strsearch.hpp

📁 ncbi源码
💻 HPP
📖 第 1 页 / 共 2 页
字号:
    vector< CState >    m_States;    bool                m_CaseSensitive;    };  // end of class CTextFsm// Convenience class when the MatchType is of string type (most cases)class NCBI_XUTIL_EXPORT CTextFsa : public CTextFsm<string>{public:    CTextFsa(bool case_sensitive = false) :        CTextFsm<string>(case_sensitive)     {}    void AddWord(const string& word) {        CTextFsm<string>::AddWord(word, word);    }};//============================================================================////                   Finite State Automata Implementation                     ////============================================================================//// Public:// =======template <typename MatchType>CTextFsm<MatchType>::CTextFsm(bool case_sensitive) :m_Primed(false), m_CaseSensitive(case_sensitive){    CState initial;    m_States.push_back(initial);}template <typename MatchType>	void CTextFsm<MatchType>::AddWord(const string& word, const MatchType& match) {    string temp = word;    if ( !m_CaseSensitive ) {        NStr::ToUpper(temp);    }    int next, i,         state = 0,        word_len = temp.length();        // try to overlay beginning of word onto existing table     for ( i = 0;  i < word_len;  ++i ) {        next = m_States[state].GetNextState(word[i]);        if ( next == eFailState ) break;        state = next;    }        // now create new states for remaining characters in word     for ( ;  i < word_len;  ++ i ) {        CState new_state;                m_States.push_back(new_state);        m_States[state].AddTransition(temp[i], m_States.size() - 1);        state = m_States[state].GetNextState(temp[i]);    }        // add match information     m_States[state].AddMatch(match);}template <typename MatchType>void CTextFsm<MatchType>::Prime(void){    if ( m_Primed ) return;        ComputeFail();        m_Primed = true;}template <typename MatchType>typename CTextFsm<MatchType>::CState *CTextFsm<MatchType>::GetState(int state) {    if ( (state < 0) || (state > m_States.size()) ) {        return 0;    }    return &(m_States[state]);}template <typename MatchType>int CTextFsm<MatchType>::GetNextState(const CState& from, char letter) const {    char ch = m_CaseSensitive ? letter : toupper(letter);    return from.GetNextState(ch);}template <typename MatchType>int CTextFsm<MatchType>::GetNextState(int state, char letter) const{    if ( state < 0 || size_t(state) >= m_States.size() ) {        return eFailState;    }        int next;    int initial = GetInitialState();    while ( (next = GetNextState(m_States[state], letter)) == eFailState ) {        if ( state == initial ) {            next = initial;            break;        }        state = m_States[state].GetOnFailure();    }        return next;}template <typename MatchType>void CTextFsm<MatchType>::QueueAdd(vector<int>& in_queue, int qbeg, int val){    int  q;        q = in_queue [qbeg];    if (q == 0) {        in_queue [qbeg] = val;    } else {        for ( ;  in_queue [q] != 0;  q = in_queue [q]) continue;        in_queue [q] = val;    }    in_queue [val] = 0;}template <typename MatchType>void CTextFsm<MatchType>::ComputeFail(void) {    int	qbeg, r, s, state;    vector<int> state_queue(m_States.size());        qbeg = 0;    state_queue [0] = 0;        // queue up states reached directly from state 0 (depth 1)         ITERATE ( typename CState::TMapCharInt,              it,               m_States[GetInitialState()].GetTransitions() ) {        s = it->second;        m_States[s].SetOnFailure(0);        QueueAdd(state_queue, qbeg, s);    }        while (state_queue [qbeg] != 0) {        r = state_queue [qbeg];        qbeg = r;                // depth 1 states beget depth 2 states, etc.                 ITERATE ( typename CState::TMapCharInt, it,                  m_States[r].GetTransitions() ) {            s = it->second;            QueueAdd(state_queue, qbeg, s);                        //   State   Substring   Transitions   Failure            //     2       st          a ->   3       6            //     3       sta         l ->   4            //     6       t           a ->   7       0            //     7       ta          p ->   8            //            //   For example, r = 2 (st), if 'a' would go to s = 3 (sta).            //   From previous computation, 2 (st) fails to 6 (t).            //   Thus, check state 6 (t) for any transitions using 'a'.            //   Since 6 (t) 'a' -> 7 (ta), therefore set fail [3] -> 7.                        state = m_States[r].GetOnFailure();            FindFail(state, s, it->first);        }    }}template <typename MatchType>void CTextFsm<MatchType>::FindFail(int state, int new_state, char ch){    int        next;        // traverse existing failure path         while ( (next = GetNextState(state, ch)) == eFailState) {        if ( state == 0 ) {            next = 0; break;        }        state = m_States[state].GetOnFailure();    }        // add new failure state         m_States[new_state].SetOnFailure(next);        // add matches of substring at new state         copy( m_States[next].GetMatches().begin(),         m_States[next].GetMatches().end(),        back_inserter(m_States[new_state].GetMatches()) );}template <typename MatchType>const vector<MatchType>& CTextFsm<MatchType>::GetMatches(int state) const {    return m_States[state].GetMatches();}template <typename MatchType>bool CTextFsm<MatchType>::IsMatchFound(int state) const{    return m_States[state].IsMatchFound();}template <typename MatchType>CTextFsm<MatchType>::~CTextFsm(void) {}END_NCBI_SCOPE/* @} *//** ===========================================================================** $Log: strsearch.hpp,v $* Revision 1000.2  2004/06/01 19:39:02  gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19** Revision 1.19  2004/05/27 13:41:16  kuznets* Fixed warnings (GCC 3.4)** Revision 1.18  2004/04/26 14:52:34  ucko* Add "typename" as needed to accommodate GCC 3.4's stricter treatment of* templates.** Revision 1.17  2004/04/01 14:14:01  lavr* Spell "occurred", "occurrence", and "occurring"** Revision 1.16  2004/03/05 15:45:23  kuznets* fixed compilation warnings on 64-bit (Sun)** Revision 1.15  2004/03/04 17:37:38  kuznets* CBoyerMooreMatcher added functions to work with different word delimiters** Revision 1.14  2004/03/03 17:55:47  kuznets* Code cleane up (CBoyerMooreMatcher) to use enums instead of bools,* better coverage or different types of whole word matchers** Revision 1.13  2004/03/02 19:59:57  kuznets* Changes in CBoyerMooreMatcher:*   - added work with memory areas*   - alternative word delimiters** Revision 1.12  2003/05/23 13:33:55  dicuccio* Changed local variable name from 'queue' to avoid clash with queue<>** Revision 1.11  2003/04/17 17:50:36  siyan* Added doxygen support** Revision 1.10  2003/03/10 17:56:34  kuznets* iterate->ITERATE** Revision 1.9  2003/02/04 20:15:16  shomrat* Change int to unsigned** Revision 1.8  2003/01/22 20:03:56  vasilche* Removed compiler warning.** Revision 1.7  2002/12/19 14:51:00  dicuccio* Added export specifier for Win32 DLL builds.** Revision 1.6  2002/11/13 19:12:23  shomrat* Add prime checking** Revision 1.5  2002/11/12 15:38:56  ucko* Since sm_FailState was constant anyway, made it into a (singleton)* enum element so it won't end up as an undefined symbol on Darwin.** Revision 1.4  2002/11/06 15:08:03  ucko* Remove extraneous CState:: that caused trouble with MIPSpro.** Revision 1.3  2002/11/05 22:58:57  shomrat* Coding style changes; Case sensetivity option added to finite state automata; Bug fix in GetNextState** Revision 1.2  2002/11/03 21:58:49  kans* BoyerMoore takes caseSensitive, wholeWord parameters (MS)** Revision 1.1  2002/10/29 16:32:52  kans* initial checkin - Boyer-Moore string search and templated text search finite state machine (MS)*** ===========================================================================*/#endif   // UTIL___STRSEARCH__HPP

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -