📄 prefixspantemplatealgm.inl
字号:
#include <algorithm>
#include <ctime>
#include <cassert>
#include <iostream>
using namespace std;
//------------------------------------------------------------------------
//------------------------------------------------------------------------
template<typename PSSequence, typename FreItemTp>
bool PSAlgorithm<PSSequence, FreItemTp>::Init(unsigned long minSupport,
unsigned long itemSetSize,
string filename)
{
totalPatternNum_ = 0;
minSupport_ = minSupport;
filename_ = filename;
out.open(outfile.c_str());
itemNumVec_.resize(itemSetSize);
SExternFreItemCounter_.Init(itemSetSize);
IExternFreItemCounter_.Init(itemSetSize);
if (!dbHandler_.ConnectDB("DBServer.cfg"))
{
cout<<"Connect database failed......"<<endl;
return false;
}
if (!GetAllSeqs())
{
cout<<"Get all sequence failed......"<<endl;
return false;
}
//*
cout<<"Begin to Remove unfrequent Items"<<endl;
//添加裁减各序列中非频繁单项方法。
RemoveUnFreItems();
cout<<"Sequence DB Size is: "<<sequenceDB_.size()<<endl;
//*/
return true;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
bool PSAlgorithm<PSSequence, FreItemTp>::GetAllSeqs()
{
string sqlStr = "SELECT COUNT(*) FROM " + filename_;
if (!dbHandler_.ExecuteSQL(sqlStr.c_str()))
return false;
//find out the number of all the sequences
_variant_t vIndex = (long)0;
_variant_t vCount = dbHandler_.pRS_->GetCollect(vIndex);
sequenceDB_.resize((long)vCount);
sqlStr = "SELECT * FROM "+ filename_;
if (!dbHandler_.ExecuteSQL(sqlStr.c_str()))
return false;
dbHandler_.pRS_->MoveFirst();
_bstr_t bstrSeq;
int i = 0;
unsigned int maxspt = 1;
while ((!dbHandler_.pRS_->adoEOF)&&(i < (long)vCount))
{
if (dbHandler_.pRS_->Fields->Item["Sequence"]->ActualSize > 0)
{
bstrSeq = dbHandler_.pRS_->Fields->Item["Sequence"]->Value;
PSSequence seqTmp = PSSequence((const char*)bstrSeq, i+1);
//统计各单项的出现次数。
CountItem(seqTmp, i);
sequenceDB_[i] = seqTmp;
++i;
}
dbHandler_.pRS_->MoveNext();
}
return true;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::CountItem(const PSSequence& seq, int currentSId)
{
for (int i=0;i<seq.Length(); ++i)
{
//统计下标为subscript_的单项的数目,其相应的位置在itemNumVec_的subscript_-1处
//因此,此处-1必不可少。
if (itemNumVec_[seq[i].subscript_-1].SId_ < currentSId)
{
++(itemNumVec_[seq[i].subscript_-1].count_);
itemNumVec_[seq[i].subscript_-1].SId_ =currentSId;
}
}
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template<typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::RemoveUnFreItems()
{
for (int i=0; i<sequenceDB_.size(); ++i)
{
for (int j=0;j<sequenceDB_[i].Length(); ++j)
{
if (itemNumVec_[sequenceDB_[i][j].subscript_-1].count_ < minSupport_)
{
sequenceDB_[i].RemoveItem(j);
--j;
}
}
}
vector<PSSequence> sequenceDBTmp;
for (i=0;i<sequenceDB_.size();++i)
{
if (!sequenceDB_[i].Empty())
{
sequenceDBTmp.push_back(sequenceDB_[i]);
//todo 在此处添加每个单项的投影数据库集合。
for (int j=0;j<sequenceDB_[i].Length()-1; ++j)
{
//插入的是items对应序列在序列数据库中的index。范围从0--(SequenceSize-1)。
projectedDBInfo_.InsertItemSeqIndex(sequenceDB_[i][j].subscript_, sequenceDBTmp.size()-1, j+1);
}
projectedDBInfo_.InsertItemSeqIndex(sequenceDB_[i][j].subscript_, sequenceDBTmp.size()-1, PSSequence::npos());
}
}
swap(sequenceDB_, sequenceDBTmp);
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::
WriteIntoFSeqsTbl(const PSSequence& seqPattern, unsigned int count)
{
/*
string sqlStr = "INSERT INTO FrequentSeqsTbl (Sequence, Frequence) VALUES('";
sqlStr += seqPattern.ToString();
sqlStr += "', ";
char buf[10];
itoa(count,buf,10);
sqlStr += buf;
sqlStr += ")";
dbHandler_.ExecuteSQL(sqlStr.c_str());
//*/
/*
if (seqPattern.Length() > 1)
{
int fileNum = seqPattern.Length();
char buf[10];
itoa(fileNum, buf, 10);
string fileName = buf;
fileName +=".txt";
ofstream outfile;
outfile.open(fileName.c_str(), ios_base::app);
outfile<<seqPattern.ToString()<<'\t'<<count<<endl;
}
*/
out<<seqPattern.ToString()<<" "<<count<<endl;
++totalPatternNum_;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
bool PSAlgorithm<PSSequence, FreItemTp>::
CountValidSuffixItem(const PSSequence& prefixSeq,
unsigned index,
const unsigned suffixPos/*,
unsigned& nextBeginPos*/)
{
NEED_TO_SPECIALIZE(); //必须要特化此方法。
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::prefixSpan()
{
for (int i=0;i<itemNumVec_.size(); ++i)
{
if (itemNumVec_[i].count_ >= minSupport_)
{
ItemType externItem(i+1);
PSSequence prefixSeq("");
prefixSeq = prefixSeq.SExtern(externItem);
vector<ProjectedSeqInfo> projectedDB;
projectedDBInfo_.GetCandidateSIndexVec(externItem.subscript_, projectedDB);
doPrefixSpan(prefixSeq, projectedDB.size(), projectedDB);
}
}
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::
doPrefixSpan(PSSequence& prefixSeq, unsigned frequence, vector<ProjectedSeqInfo>& projectedDB)
{
WriteIntoFSeqsTbl(prefixSeq, frequence);
vector<FreItemTp> SExternFreItemVec;
vector<FreItemTp> IExternFreItemVec;
CountFreItems(prefixSeq, projectedDB, SExternFreItemVec, IExternFreItemVec);
for (int i=0;i<SExternFreItemVec.size();++i)
{
PSSequence newPrefixSeq = prefixSeq.SExtern(SExternFreItemVec[i].item_);
vector<ProjectedSeqInfo> newProjectedDB;
for (int j=0;j<projectedDB.size();++j)
{
unsigned suffixPos = sequenceDB_[projectedDB[j].SIndex_].GetSuffixPos(newPrefixSeq);
if (suffixPos != PSSequence::npos())
{
newProjectedDB.push_back(ProjectedSeqInfo(projectedDB[j].SIndex_,suffixPos));
}
}
if (newProjectedDB.size() >= minSupport_)
doPrefixSpan(newPrefixSeq, SExternFreItemVec[i].count_, newProjectedDB);
else
WriteIntoFSeqsTbl(newPrefixSeq, SExternFreItemVec[i].count_);
}
for (i=0;i<IExternFreItemVec.size();++i)
{
PSSequence newPrefixSeq = prefixSeq.IExtern(IExternFreItemVec[i].item_);
vector<ProjectedSeqInfo> newProjectedDB;
for (int j=0;j<projectedDB.size();++j)
{
unsigned suffixPos = sequenceDB_[projectedDB[j].SIndex_].GetSuffixPos(newPrefixSeq);
if (suffixPos != PSSequence::npos())
{
newProjectedDB.push_back(ProjectedSeqInfo(projectedDB[j].SIndex_,suffixPos));
}
}
if (newProjectedDB.size() >= minSupport_)
doPrefixSpan(newPrefixSeq, IExternFreItemVec[i].count_, newProjectedDB);
else
WriteIntoFSeqsTbl(newPrefixSeq, IExternFreItemVec[i].count_);
}
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::
CountFreItems(const PSSequence& prefixSeq,
const vector<ProjectedSeqInfo>& projectedDB,
vector<FreItemTp>& SExternFreItemVec,
vector<FreItemTp>& IExternFreItemVec)
{
unsigned int relatedSeqsNum = 0;
for (int i=0;i<projectedDB.size(); ++i)
{
if (projectedDB[i].suffixPos_ != PSSequence::npos())
{
CountValidSuffixItem(prefixSeq, projectedDB[i].SIndex_, projectedDB[i].suffixPos_);
++relatedSeqsNum;
}
}
if (relatedSeqsNum >= minSupport_)
{
SExternFreItemCounter_.GetFreItems(SExternFreItemVec, minSupport_);
IExternFreItemCounter_.GetFreItems(IExternFreItemVec, minSupport_);
}
SExternFreItemCounter_.ResetCounter();
IExternFreItemCounter_.ResetCounter();
}
///////////////////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
inline bool PSAlgorithm<PSSequence, FreItemTp>::
LastElementExit(
const PSSequence& prefixSeq,
unsigned index,
unsigned pos )const
{
if( pos == 0) return false;//没有可匹配的
int i = pos-1;//投影序列从pos前的第一个项开始匹配
int j = prefixSeq.Length()-1;//前缀序列从最后一项开始匹配
while( sequenceDB_[index][i].timestamp_ == sequenceDB_[index][pos].timestamp_
&& prefixSeq[j].timestamp_ == prefixSeq[prefixSeq.Length()-1].timestamp_
&& i >= 0
)
{
if(sequenceDB_[index][i].subscript_ < prefixSeq[j].subscript_) break;
if(sequenceDB_[index][i].subscript_ > prefixSeq[j].subscript_) i--;
if(sequenceDB_[index][i].subscript_ == prefixSeq[j].subscript_)
{
if(j == 0)
return true;//匹配成功
else
{
i--;
j--;
}//继续匹配
}//if
}//while
if(prefixSeq[j].timestamp_ < prefixSeq[prefixSeq.Length()-1].timestamp_)
return true;
else return false;
}//
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -