📄 markov.cpp
字号:
/*
Copyright (c) 2001
Author: Konstantin Boukreev
E-mail: konstantin@mail.primorye.ru
Created: 19.12.2001 14:51:00
Version: 1.0.0
*/
#include "stdafx.h"
#include "markov.h"
markov_chains::iterator& markov_chains::iterator::operator = (const iterator& i)
{
if (&i != this)
{
hash_it = i.hash_it;
prfx_it = i.prfx_it;
}
return *this;
}
bool markov_chains::iterator::operator == (const iterator& i) const
{
if (&i == this)
return true;
if ((hash_it == i.hash_it) &&
(prfx_it == i.prfx_it))
return true;
return false;
}
bool markov_chains::iterator::operator != (const iterator& i) const
{ return !operator == (i); }
markov_chains::iterator& markov_chains::iterator::operator ++ ()
{
_ASSERTE(hash_it != markov.m_hash.end());
_ASSERTE(prfx_it != (*hash_it).end());
++prfx_it;
while (prfx_it == (*hash_it).end())
{
++hash_it;
if (hash_it == markov.m_hash.end())
{
break; // the end
}
prfx_it = (*hash_it).begin();
}
return *this;
}
markov_chains::iterator markov_chains::iterator::operator ++ (int) // post
{
iterator i(*this);
operator ++();
return i;
}
markov_chains::iterator::reference markov_chains::iterator::operator * ()
{ return *prfx_it; }
// implementation of Markov chains
markov_chains::markov_chains(unsigned hash_size)
: m_size(0)
{
m_hash.resize(hash_size);
}
markov_chains::~markov_chains()
{
clear();
}
void markov_chains::insert(const char * first, const char * second, const char * s)
{
_ASSERTE(first);
_ASSERTE(second);
_ASSERTE(s);
unsigned idx = hash_func(first, second);
prefixs& pxs = m_hash[idx];
for (prefixs::iterator i = pxs.begin(); i != pxs.end(); ++i)
{
prefix& p = (*i);
if (!compare(p.first, first) &&
!compare(p.second, second))
{
for (suffixs::iterator x = p.suffxs.begin(); x != p.suffxs.end(); ++x)
{
if (!compare((*x).s, s))
{
++(*x).n;
return; // ok, it's already exists
}
}
// add new suffix
p.suffxs.push_back(suffix(find_word(s)));
return;
}
}
// add new prefix
++m_size;
pxs.push_back(prefix());
prefix& p = pxs.back();
p.first = find_word(first);
p.second = find_word(second);
p.suffxs.push_back(suffix(find_word(s)));
}
const char * markov_chains::find(const char * first, const char * second)
{
_ASSERTE(first);
_ASSERTE(second);
unsigned idx = hash_func(first, second);
prefixs& pxs = m_hash[idx];
for (prefixs::iterator i = pxs.begin(); i != pxs.end(); ++i)
{
prefix& p = (*i);
if (!compare(p.first, first) &&
!compare(p.second, second))
{
suffixs::iterator x = p.suffxs.begin();
// std::advance(x, rand() % p.suffxs.size());
// return (*x).s;
if (p.suffxs.size() == 1)
return (*x).s;
int n = 0;
for ( ; x != p.suffxs.end(); ++x)
n += (*x).n;
n = rand() % n;
for (x = p.suffxs.begin(); ; ++x) //x != p.suffxs.end()
if ((int)(*x).n > n) break;
else n -= (*x).n;
_ASSERTE(x != p.suffxs.end());
return (*x).s;
}
}
return 0;
}
void markov_chains::clear()
{
m_hash.clear();
for (words::iterator i = m_words.begin(); i != m_words.end(); ++i)
free((*i).second);
m_words.clear();
}
markov_chains::iterator markov_chains::begin()
{ return iterator(*this, m_hash.begin(), m_hash.front().begin());}
markov_chains::iterator markov_chains::end()
{ return iterator(*this, m_hash.end(), m_hash.back().end()); }
#ifdef _DEBUG
void markov_chains::debug_report2(std::ostream& os)
{
unsigned ave_len = 0;
unsigned word_num = 0;
os << "markov chains:" << std::endl;
for (iterator i = begin(); i != end(); ++i)
{
prefix& p = (*i);
os << p.first << " " << p.second << " | ";
suffixs& sfx = p.suffxs;
for (suffixs::iterator i = sfx.begin(); i != sfx.end(); ++i)
{
os << " " << (*i).s << "(" << (*i).n << ")";
ave_len += strlen((*i).s);
++word_num;
}
os << std::endl;
}
os << std::endl;
os << word_num << "/" << (word_num ? ave_len/word_num : 0) << std::endl;
}
void markov_chains::debug_report1(std::ostream& os)
{
os << "hash stat:" << std::endl;
unsigned total_pref = 0;
unsigned total_sufx = 0;
for (hash::iterator i = m_hash.begin(); i != m_hash.end(); ++i)
{
total_pref += (*i).size();
os << " " << (*i).size();
unsigned num_sufx = 0;
prefixs& pfx = (*i);
for (prefixs::iterator i = pfx.begin(); i != pfx.end(); ++i)
num_sufx += (*i).suffxs.size();
os << "/" << num_sufx;
total_sufx += num_sufx;
}
os << " : " << total_pref << "/" << total_sufx << std::endl;
}
#endif
bool get_word(std::istream& is, std::string& s)
{
while (!is.eof())
{
is >> s;
if (s.size() == 1 && ispunct(s[0]))
continue;
return true;
}
return false;
}
// static
markov_chains* markov_chains::create_from_file(const char* filename, unsigned ave_word_size, unsigned hash_max_deep)
{
_ASSERTE(filename);
std::ifstream fi(filename);
if (!fi.is_open()) return 0;
fi.exceptions(std::ios_base::badbit); //|std::ios_base::failbit
markov_chains * markov = 0;
try
{
std::ifstream::_Myfb* buf = fi.rdbuf();
int bytes = buf->pubseekoff(0, std::ios_base::end);
buf->pubseekoff(0, std::ios_base::beg);
markov = new markov_chains(bytes / (ave_word_size * hash_max_deep));
std::string s1, s2, s3;
if(get_word(fi, s1) &&
get_word(fi, s2) &&
get_word(fi, s3))
{
do
{
markov->insert(s1.c_str(), s2.c_str(), s3.c_str());
s1 = s2; s2 = s3;
}
while (get_word(fi, s3));
}
}
catch (std::ios_base::failure&)
{
delete markov;
markov = 0;
}
catch (std::exception&)
{
delete markov;
throw;
}
return markov;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -