⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 prefixspantemplatealgm.inl

📁 C++编写的代码
💻 INL
字号:
#include <algorithm>
#include <ctime>
#include <cassert>

#include <iostream>

using namespace std;
//------------------------------------------------------------------------
//------------------------------------------------------------------------
template<typename PSSequence, typename FreItemTp>
bool PSAlgorithm<PSSequence, FreItemTp>::Init(unsigned long minSupport, 
											  unsigned long itemSetSize,
											  string filename)
{
	totalPatternNum_ = 0;
	minSupport_ = minSupport;
	
	filename_ = filename;

	out.open(outfile.c_str());

	itemNumVec_.resize(itemSetSize);
	SExternFreItemCounter_.Init(itemSetSize);
	IExternFreItemCounter_.Init(itemSetSize);
	
	if (!dbHandler_.ConnectDB("DBServer.cfg"))
	{
		cout<<"Connect database failed......"<<endl;
		return false;
	}
	
	if (!GetAllSeqs())
	{
		cout<<"Get all sequence failed......"<<endl;
		return false;
	}

	//*
	cout<<"Begin to Remove unfrequent Items"<<endl;
	//添加裁减各序列中非频繁单项方法。
	RemoveUnFreItems();
	cout<<"Sequence DB Size is: "<<sequenceDB_.size()<<endl;
	//*/
	return true;
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
bool PSAlgorithm<PSSequence, FreItemTp>::GetAllSeqs()
{   

	string sqlStr = "SELECT COUNT(*) FROM " + filename_;
	if (!dbHandler_.ExecuteSQL(sqlStr.c_str()))
		return false;
	
	//find out the number of all the sequences
	_variant_t vIndex = (long)0;
	_variant_t vCount = dbHandler_.pRS_->GetCollect(vIndex);
	
	sequenceDB_.resize((long)vCount);
	
	sqlStr = "SELECT * FROM "+ filename_;
	if (!dbHandler_.ExecuteSQL(sqlStr.c_str()))
		return false;
	
	dbHandler_.pRS_->MoveFirst();
	_bstr_t bstrSeq;
	int i = 0;
	unsigned int maxspt = 1;
	while ((!dbHandler_.pRS_->adoEOF)&&(i < (long)vCount))
	{
		if (dbHandler_.pRS_->Fields->Item["Sequence"]->ActualSize > 0)
		{
			bstrSeq = dbHandler_.pRS_->Fields->Item["Sequence"]->Value;
			PSSequence seqTmp =  PSSequence((const char*)bstrSeq, i+1);
			
			//统计各单项的出现次数。
			CountItem(seqTmp, i);
			sequenceDB_[i] = seqTmp;
			++i;
		}
		dbHandler_.pRS_->MoveNext();
	}	
	return true;
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::CountItem(const PSSequence& seq, int currentSId)
{
	for (int i=0;i<seq.Length(); ++i)
	{
		
		//统计下标为subscript_的单项的数目,其相应的位置在itemNumVec_的subscript_-1处
		//因此,此处-1必不可少。
		if (itemNumVec_[seq[i].subscript_-1].SId_ < currentSId)
		{
			++(itemNumVec_[seq[i].subscript_-1].count_);
			itemNumVec_[seq[i].subscript_-1].SId_  =currentSId;
		}
		
	}
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template<typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::RemoveUnFreItems()
{
	for (int i=0; i<sequenceDB_.size(); ++i)
	{
		for (int j=0;j<sequenceDB_[i].Length(); ++j)
		{
			if (itemNumVec_[sequenceDB_[i][j].subscript_-1].count_ < minSupport_)
			{
				sequenceDB_[i].RemoveItem(j);
				--j;
			}
		}		
	}

	vector<PSSequence> sequenceDBTmp;
	for (i=0;i<sequenceDB_.size();++i)
	{
		if (!sequenceDB_[i].Empty())
		{
			sequenceDBTmp.push_back(sequenceDB_[i]);
			//todo 在此处添加每个单项的投影数据库集合。
			for (int j=0;j<sequenceDB_[i].Length()-1; ++j)
			{
				//插入的是items对应序列在序列数据库中的index。范围从0--(SequenceSize-1)。
				projectedDBInfo_.InsertItemSeqIndex(sequenceDB_[i][j].subscript_, sequenceDBTmp.size()-1, j+1);
			}
			projectedDBInfo_.InsertItemSeqIndex(sequenceDB_[i][j].subscript_, sequenceDBTmp.size()-1, PSSequence::npos());
		}
	}
	swap(sequenceDB_, sequenceDBTmp);
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::
WriteIntoFSeqsTbl(const PSSequence& seqPattern, unsigned int count)
{
	/*
	string sqlStr = "INSERT INTO FrequentSeqsTbl (Sequence, Frequence) VALUES('";
	sqlStr += seqPattern.ToString();
	sqlStr += "', ";
	char buf[10];
	itoa(count,buf,10);
	sqlStr += buf;
	sqlStr += ")";

	dbHandler_.ExecuteSQL(sqlStr.c_str());
	//*/

	/*
	if (seqPattern.Length() > 1)
	{
		int fileNum = seqPattern.Length();
		char buf[10];
		itoa(fileNum, buf, 10);
		string fileName = buf;
		fileName +=".txt";
		ofstream outfile;
		outfile.open(fileName.c_str(), ios_base::app);
		outfile<<seqPattern.ToString()<<'\t'<<count<<endl;
	}
	*/

    out<<seqPattern.ToString()<<" "<<count<<endl;
	
	++totalPatternNum_;
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
bool PSAlgorithm<PSSequence, FreItemTp>::
CountValidSuffixItem(const PSSequence& prefixSeq,
					unsigned index, 
					const unsigned suffixPos/*,
					unsigned& nextBeginPos*/)
{
	NEED_TO_SPECIALIZE();	//必须要特化此方法。
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::prefixSpan()
{	
	for (int i=0;i<itemNumVec_.size(); ++i)
	{
		if (itemNumVec_[i].count_ >= minSupport_)
		{
			ItemType externItem(i+1);
			PSSequence prefixSeq("");
			prefixSeq = prefixSeq.SExtern(externItem);
			vector<ProjectedSeqInfo> projectedDB;
			projectedDBInfo_.GetCandidateSIndexVec(externItem.subscript_, projectedDB);
			
			doPrefixSpan(prefixSeq, projectedDB.size(), projectedDB);
		}
	}
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::
doPrefixSpan(PSSequence& prefixSeq, unsigned frequence, vector<ProjectedSeqInfo>& projectedDB)
{
	WriteIntoFSeqsTbl(prefixSeq, frequence);
	vector<FreItemTp> SExternFreItemVec;
	vector<FreItemTp> IExternFreItemVec;
	CountFreItems(prefixSeq, projectedDB, SExternFreItemVec, IExternFreItemVec);

	for (int i=0;i<SExternFreItemVec.size();++i)
	{
		PSSequence newPrefixSeq = prefixSeq.SExtern(SExternFreItemVec[i].item_);
		vector<ProjectedSeqInfo> newProjectedDB;
		for (int j=0;j<projectedDB.size();++j)
		{
			unsigned suffixPos = sequenceDB_[projectedDB[j].SIndex_].GetSuffixPos(newPrefixSeq);
			if (suffixPos != PSSequence::npos())
			{
				newProjectedDB.push_back(ProjectedSeqInfo(projectedDB[j].SIndex_,suffixPos));
			}
		}
		if (newProjectedDB.size() >= minSupport_)
			doPrefixSpan(newPrefixSeq, SExternFreItemVec[i].count_, newProjectedDB);
		else
			WriteIntoFSeqsTbl(newPrefixSeq, SExternFreItemVec[i].count_);
	}

	for (i=0;i<IExternFreItemVec.size();++i)
	{
		PSSequence newPrefixSeq = prefixSeq.IExtern(IExternFreItemVec[i].item_);
		vector<ProjectedSeqInfo> newProjectedDB;
		for (int j=0;j<projectedDB.size();++j)
		{
			unsigned suffixPos = sequenceDB_[projectedDB[j].SIndex_].GetSuffixPos(newPrefixSeq);
			if (suffixPos != PSSequence::npos())
			{
				newProjectedDB.push_back(ProjectedSeqInfo(projectedDB[j].SIndex_,suffixPos));
			}
		}
		if (newProjectedDB.size() >= minSupport_)
			doPrefixSpan(newPrefixSeq, IExternFreItemVec[i].count_, newProjectedDB);
		else
			WriteIntoFSeqsTbl(newPrefixSeq, IExternFreItemVec[i].count_);
	}
}

//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp>
void PSAlgorithm<PSSequence, FreItemTp>::
CountFreItems(const PSSequence& prefixSeq, 
			  const vector<ProjectedSeqInfo>& projectedDB, 
			  vector<FreItemTp>& SExternFreItemVec,
			  vector<FreItemTp>& IExternFreItemVec)
{
	unsigned int relatedSeqsNum = 0;
	for (int i=0;i<projectedDB.size(); ++i)
	{		
		if (projectedDB[i].suffixPos_ != PSSequence::npos())
		{
			CountValidSuffixItem(prefixSeq, projectedDB[i].SIndex_, projectedDB[i].suffixPos_);
			++relatedSeqsNum;
		}
	}

	if (relatedSeqsNum >= minSupport_)
	{
		SExternFreItemCounter_.GetFreItems(SExternFreItemVec, minSupport_);
		IExternFreItemCounter_.GetFreItems(IExternFreItemVec, minSupport_);
	}
	
	SExternFreItemCounter_.ResetCounter();
	IExternFreItemCounter_.ResetCounter();
}
///////////////////////////////////////////////////////////////////////////////////////
template <typename PSSequence, typename FreItemTp> 
inline bool PSAlgorithm<PSSequence, FreItemTp>::
LastElementExit(
                const PSSequence& prefixSeq, 
		        unsigned index, 
			    unsigned pos  )const
{   
    if( pos == 0) return false;//没有可匹配的

	int i = pos-1;//投影序列从pos前的第一个项开始匹配
	int j = prefixSeq.Length()-1;//前缀序列从最后一项开始匹配
	while( sequenceDB_[index][i].timestamp_ == sequenceDB_[index][pos].timestamp_
		&& prefixSeq[j].timestamp_ == prefixSeq[prefixSeq.Length()-1].timestamp_
		&& i >= 0
		)
	{ 
		if(sequenceDB_[index][i].subscript_ < prefixSeq[j].subscript_) break;

		if(sequenceDB_[index][i].subscript_ > prefixSeq[j].subscript_) i--;

		if(sequenceDB_[index][i].subscript_ == prefixSeq[j].subscript_) 
		{   
			if(j == 0) 
				return true;//匹配成功
			else
			{
			  i--;
			  j--;
			}//继续匹配
		
		}//if

		
	}//while
	
	
    if(prefixSeq[j].timestamp_ < prefixSeq[prefixSeq.Length()-1].timestamp_)
		return true;
	else return false;
}//

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -