📄 trietree.h
字号:
#pragma once
#include "TrieTreeNode.h"
#include <string>
#include <stack>
#include <iostream>
#include <fstream>
using std::string ;
using std::stack ;
using std::endl;
class TrieTree
{
public:
TrieTree()
{
root = new TrieTreeNode ;
}
~TrieTree()
{
RemoveTreeNode(root) ;
}
int GetLongestMatchByPy(const string& py, vector<int>& vecIdTable)
{
return 0;
}
//get the matched word information by PY, return the longest matched length
int GetWordByLongestMatchedPy(const string& py, vector<int>& vecIdTable)
{
if (py.size()==0)
{
return 0;
}
TrieTreeNode *pCurrentTreeNode = root ;
stack<TrieTreeNode*> traceStack ;
//traceStack.push(pCurrentTreeNode) ;
size_t i = 0 ;
while (true)
{
char chCurrentChar = py[i] ;
i++ ;
traceStack.push(pCurrentTreeNode) ;
if (i<py.size())
{
char chNextChar = py[i] ;
//test if it should go to next character
TrieTreeNode *pNextTreeNode = pCurrentTreeNode->m_Alphabet[chCurrentChar-'a'].m_pNextNode ;
if(pNextTreeNode)// && !pNextTreeNode->m_Alphabet[chNextChar-'a'].IsEmpty())
{
pCurrentTreeNode = pNextTreeNode ;
//traceStack.push(pCurrentTreeNode) ;
continue ;
}
}
vecIdTable.clear() ;
pCurrentTreeNode->m_Alphabet[chCurrentChar-'a'].GetWord(vecIdTable) ;
break ;
}
//trace back to do longest matching
if (vecIdTable.size()==0)
{
while (vecIdTable.size()==0 && traceStack.size()>0 )
{
i-- ;
char chCurrentChar = py[i] ;
pCurrentTreeNode = traceStack.top() ;
traceStack.pop() ;
pCurrentTreeNode->m_Alphabet[chCurrentChar-'a'].GetWord(vecIdTable) ;
}
if (traceStack.size() == 0 && vecIdTable.size()==0) //if none matched
{
return static_cast<int>(i) ;
}
else
{
return static_cast<int>(i+1) ;
}
}
return static_cast<int>(i) ;
}
void InsertSingleWord(const string& py, double frequency, int id)
{
if (py.empty())
{
return ;
}
TrieTreeNode *pCurrentNode = root ;
size_t i = 0 ;
while(true)
{
size_t index = py[i] - 'a' ;
i++ ;
if ( i>=py.size() )
{
pCurrentNode->m_Alphabet[index].InsertWord(frequency, id) ;
break ;
}
TrieTreeNode *pNextTreeNode = (pCurrentNode->m_Alphabet[index]).m_pNextNode ;
if ( pNextTreeNode== 0)
{
pNextTreeNode = new TrieTreeNode ;
pCurrentNode->m_Alphabet[index].m_pNextNode = pNextTreeNode ;
}
pCurrentNode = pNextTreeNode ;
}
}
void IncreaseFrequency(const string& py, int id)
{
if (py.empty())
{
return ;
}
TrieTreeNode *pCurrentNode = root ;
size_t i ;
for (i=0; i<py.size()-1; i++)
{
size_t index = py[i] - 'a' ;
pCurrentNode = pCurrentNode->m_Alphabet[index].m_pNextNode ;
}
pCurrentNode->m_Alphabet[ py[i]-'a'].IncreaseFrequency(id) ;
}
bool Save(const string& filename)
{
std::ofstream out(filename.c_str()) ;
if (!out.is_open())
{
return false ;
}
Save(root, out) ;
out.close() ;
return true ;
}
bool Load(const string& filename)
{
std::ifstream in(filename.c_str()) ;
if (!in.is_open())
{
return false ;
}
Load(root, in) ;
in.close() ;
return true ;
}
private:
void Save(TrieTreeNode* p, std::ofstream& out)
{
if (!p)
{
out << "n" << endl;
return ;
}
out << "y " ;
for (int i=0; i<26; i++)
{
size_t iSize = p->m_Alphabet[i].m_frequencyHeap.m_vecFrequencyId.size() ;
out << iSize << " " ;
for (size_t j=0; j<iSize; j++)
{
out << p->m_Alphabet[i].m_frequencyHeap.m_vecFrequencyId[j].getid() << " " << p->m_Alphabet[i].m_frequencyHeap.m_vecFrequencyId[j].getfrequency() << " ";
}
out << endl;
}
for (int i=0; i<26; i++)
{
Save(p->m_Alphabet[i].m_pNextNode, out) ;
}
}
void Load(TrieTreeNode*& p, std::ifstream& in)
{
string input ;
in >> input ;
if(input == "n")
{
p = 0 ;
return ;
}
p = new TrieTreeNode ;
int id ;
double frequency ;
for(int i=0; i<26; i++)
{
in >> input ;
size_t iSize = atoi(input.c_str()) ;
for (size_t j=0; j<iSize; j++)
{
in >> id >> frequency ;
p->m_Alphabet[i].InsertWord(frequency, id) ;
}
}
for (int i=0; i<26; i++)
{
Load(p->m_Alphabet[i].m_pNextNode, in) ;
}
}
void RemoveTreeNode(TrieTreeNode* p)
{
if ( p )
{
for (int i = 0; i<26; i++)
{
RemoveTreeNode( (p->m_Alphabet[i]).m_pNextNode) ;
}
delete p ;
}
}
TrieTreeNode *root ;
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -