📄 strsearch.hpp
字号:
vector< CState > m_States; bool m_CaseSensitive; }; // end of class CTextFsm// Convenience class when the MatchType is of string type (most cases)class NCBI_XUTIL_EXPORT CTextFsa : public CTextFsm<string>{public: CTextFsa(bool case_sensitive = false) : CTextFsm<string>(case_sensitive) {} void AddWord(const string& word) { CTextFsm<string>::AddWord(word, word); }};//============================================================================//// Finite State Automata Implementation ////============================================================================//// Public:// =======template <typename MatchType>CTextFsm<MatchType>::CTextFsm(bool case_sensitive) :m_Primed(false), m_CaseSensitive(case_sensitive){ CState initial; m_States.push_back(initial);}template <typename MatchType> void CTextFsm<MatchType>::AddWord(const string& word, const MatchType& match) { string temp = word; if ( !m_CaseSensitive ) { NStr::ToUpper(temp); } int next, i, state = 0, word_len = temp.length(); // try to overlay beginning of word onto existing table for ( i = 0; i < word_len; ++i ) { next = m_States[state].GetNextState(word[i]); if ( next == eFailState ) break; state = next; } // now create new states for remaining characters in word for ( ; i < word_len; ++ i ) { CState new_state; m_States.push_back(new_state); m_States[state].AddTransition(temp[i], m_States.size() - 1); state = m_States[state].GetNextState(temp[i]); } // add match information m_States[state].AddMatch(match);}template <typename MatchType>void CTextFsm<MatchType>::Prime(void){ if ( m_Primed ) return; ComputeFail(); m_Primed = true;}template <typename MatchType>typename CTextFsm<MatchType>::CState *CTextFsm<MatchType>::GetState(int state) { if ( (state < 0) || (state > m_States.size()) ) { return 0; } return &(m_States[state]);}template <typename MatchType>int CTextFsm<MatchType>::GetNextState(const CState& from, char letter) const { char ch = m_CaseSensitive ? letter : toupper(letter); return from.GetNextState(ch);}template <typename MatchType>int CTextFsm<MatchType>::GetNextState(int state, char letter) const{ if ( state < 0 || size_t(state) >= m_States.size() ) { return eFailState; } int next; int initial = GetInitialState(); while ( (next = GetNextState(m_States[state], letter)) == eFailState ) { if ( state == initial ) { next = initial; break; } state = m_States[state].GetOnFailure(); } return next;}template <typename MatchType>void CTextFsm<MatchType>::QueueAdd(vector<int>& in_queue, int qbeg, int val){ int q; q = in_queue [qbeg]; if (q == 0) { in_queue [qbeg] = val; } else { for ( ; in_queue [q] != 0; q = in_queue [q]) continue; in_queue [q] = val; } in_queue [val] = 0;}template <typename MatchType>void CTextFsm<MatchType>::ComputeFail(void) { int qbeg, r, s, state; vector<int> state_queue(m_States.size()); qbeg = 0; state_queue [0] = 0; // queue up states reached directly from state 0 (depth 1) ITERATE ( typename CState::TMapCharInt, it, m_States[GetInitialState()].GetTransitions() ) { s = it->second; m_States[s].SetOnFailure(0); QueueAdd(state_queue, qbeg, s); } while (state_queue [qbeg] != 0) { r = state_queue [qbeg]; qbeg = r; // depth 1 states beget depth 2 states, etc. ITERATE ( typename CState::TMapCharInt, it, m_States[r].GetTransitions() ) { s = it->second; QueueAdd(state_queue, qbeg, s); // State Substring Transitions Failure // 2 st a -> 3 6 // 3 sta l -> 4 // 6 t a -> 7 0 // 7 ta p -> 8 // // For example, r = 2 (st), if 'a' would go to s = 3 (sta). // From previous computation, 2 (st) fails to 6 (t). // Thus, check state 6 (t) for any transitions using 'a'. // Since 6 (t) 'a' -> 7 (ta), therefore set fail [3] -> 7. state = m_States[r].GetOnFailure(); FindFail(state, s, it->first); } }}template <typename MatchType>void CTextFsm<MatchType>::FindFail(int state, int new_state, char ch){ int next; // traverse existing failure path while ( (next = GetNextState(state, ch)) == eFailState) { if ( state == 0 ) { next = 0; break; } state = m_States[state].GetOnFailure(); } // add new failure state m_States[new_state].SetOnFailure(next); // add matches of substring at new state copy( m_States[next].GetMatches().begin(), m_States[next].GetMatches().end(), back_inserter(m_States[new_state].GetMatches()) );}template <typename MatchType>const vector<MatchType>& CTextFsm<MatchType>::GetMatches(int state) const { return m_States[state].GetMatches();}template <typename MatchType>bool CTextFsm<MatchType>::IsMatchFound(int state) const{ return m_States[state].IsMatchFound();}template <typename MatchType>CTextFsm<MatchType>::~CTextFsm(void) {}END_NCBI_SCOPE/* @} *//** ===========================================================================** $Log: strsearch.hpp,v $* Revision 1000.2 2004/06/01 19:39:02 gouriano* PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.19** Revision 1.19 2004/05/27 13:41:16 kuznets* Fixed warnings (GCC 3.4)** Revision 1.18 2004/04/26 14:52:34 ucko* Add "typename" as needed to accommodate GCC 3.4's stricter treatment of* templates.** Revision 1.17 2004/04/01 14:14:01 lavr* Spell "occurred", "occurrence", and "occurring"** Revision 1.16 2004/03/05 15:45:23 kuznets* fixed compilation warnings on 64-bit (Sun)** Revision 1.15 2004/03/04 17:37:38 kuznets* CBoyerMooreMatcher added functions to work with different word delimiters** Revision 1.14 2004/03/03 17:55:47 kuznets* Code cleane up (CBoyerMooreMatcher) to use enums instead of bools,* better coverage or different types of whole word matchers** Revision 1.13 2004/03/02 19:59:57 kuznets* Changes in CBoyerMooreMatcher:* - added work with memory areas* - alternative word delimiters** Revision 1.12 2003/05/23 13:33:55 dicuccio* Changed local variable name from 'queue' to avoid clash with queue<>** Revision 1.11 2003/04/17 17:50:36 siyan* Added doxygen support** Revision 1.10 2003/03/10 17:56:34 kuznets* iterate->ITERATE** Revision 1.9 2003/02/04 20:15:16 shomrat* Change int to unsigned** Revision 1.8 2003/01/22 20:03:56 vasilche* Removed compiler warning.** Revision 1.7 2002/12/19 14:51:00 dicuccio* Added export specifier for Win32 DLL builds.** Revision 1.6 2002/11/13 19:12:23 shomrat* Add prime checking** Revision 1.5 2002/11/12 15:38:56 ucko* Since sm_FailState was constant anyway, made it into a (singleton)* enum element so it won't end up as an undefined symbol on Darwin.** Revision 1.4 2002/11/06 15:08:03 ucko* Remove extraneous CState:: that caused trouble with MIPSpro.** Revision 1.3 2002/11/05 22:58:57 shomrat* Coding style changes; Case sensetivity option added to finite state automata; Bug fix in GetNextState** Revision 1.2 2002/11/03 21:58:49 kans* BoyerMoore takes caseSensitive, wholeWord parameters (MS)** Revision 1.1 2002/10/29 16:32:52 kans* initial checkin - Boyer-Moore string search and templated text search finite state machine (MS)*** ===========================================================================*/#endif // UTIL___STRSEARCH__HPP
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -