⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 texttree.cpp

📁 包含最大频繁序列的挖掘; 包含层次聚类算法
💻 CPP
字号:
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
Struct: TextTree

  Description: Used to store a rooted ordered tree, i.e., 
  a transaction in the database.
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#include "CMRmisc.h"
#include "TextTree.h"


bool compare_path(const	vector<short>& a,const	vector<short>& b)
{
	//从后往前比较两个长度相等的路径a与b
	int length=a.size();
	for(int i=length-1; i>=0; i--)
	{
		if( a[i] != b[i]) return false;
	}
    return true;
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
operator>>()

  Decription: read in a rooted ordered tree in Zaki's format
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
istream& operator>>(istream& in, TextTree& rhs)
{
	int total;
	short temp;
	stack<short> tempK;
	
	in >> rhs.classid;
	in >> rhs.doctid;
	if ( in.eof() ) return in; //the end of file, no more trees to read
	in >> rhs.doctid; //read tid twice
	
	rhs.path.clear();
	int pathid=0;
	bool flag=true;
	vector< pair<int, vector<short> > >::iterator pos;
	
	in >> total;
	in >> temp; //read in the root label
	
	rhs.vLabel.push_back(temp);
	rhs.firstChild.push_back(-1); //temporarily, the root has no child
	rhs.nextSibling.push_back(-1); //the root has no sibling
	rhs.parent.push_back(-1); //the root has no parent
	rhs.vNumber = 1;
	
	tempK.push(0); //the index of the root
	
	for ( short i = 1; i < total; i++ ) {
		in >> temp;
		if ( temp == -1 ) { //a backtrack
			tempK.pop();
			if(flag){
				//记录该路径
				vector<short> temppath;
				short label=rhs.vLabel[rhs.vNumber-1];
				short parentpos=rhs.parent[rhs.vNumber-1];
				while (parentpos!=-1)
				{
					if(temppath.empty()) temppath.push_back(label);
					else temppath.insert(temppath.begin(),label);
					temppath.insert(temppath.begin(),-1);
					label = rhs.vLabel[parentpos];
					parentpos = rhs.parent[parentpos];
				}
				temppath.insert(temppath.begin(),label);
				//已经得到路径,查找该树中有没有重复路径
				int temppathnum = temppath.size();
				if(rhs.path.empty())
				{
					rhs.path.push_back(make_pair(pathid,temppath));
					pathid++;
				}
				else
				{   bool flagequal=false;
					for(pos = rhs.path.begin(); pos!=rhs.path.end() && !flagequal ; ++pos)
					{
						if(pos->second.size() == temppathnum)
						{
							if(compare_path(pos->second,temppath))
							   flagequal=true;
						}
					}
					if(!flagequal){
						//没有找到重复的
						rhs.path.push_back(make_pair(pathid,temppath));
						pathid++;
					}
				}
				flag=false;
			}
			continue; //nothing to do with the TextTree
		}
		
		flag=true;

		rhs.vLabel.push_back(temp); //add the new vertex label
		
		if (rhs.firstChild[tempK.top()] == -1) { //if the current node has no child yet
			rhs.firstChild[tempK.top()] = rhs.vNumber;	
		}
		else { //if the current node has children already, find the rightmost child, its nextSibling is the new node
			short j = tempK.top();
			j = rhs.firstChild[j];
			while ( rhs.nextSibling[j] != -1 ) j = rhs.nextSibling[j];
			rhs.nextSibling[j] = rhs.vNumber;
		}
		
		rhs.firstChild.push_back(-1); //the new node has no child yet
		rhs.nextSibling.push_back(-1); //the new node has no right sibling yet
		rhs.parent.push_back(tempK.top()); //the parent of the new node is the current node
		tempK.push(rhs.vNumber);
		rhs.vNumber++;
	}
	return in;
}


/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
dfsVisit(const short, const TextTree& rhs, vector<short>& zakiCode)

  Decription: a helper function for depth-first-visit
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
void dfsVisit(const short current, const TextTree& rhs, vector<short>& zakiCode)
{
	zakiCode.push_back(rhs.vLabel[current]);
	short i = rhs.firstChild[current];
	while ( i != -1 )
	{
		dfsVisit(i,rhs,zakiCode);
		i = rhs.nextSibling[i];
	}
	zakiCode.push_back(-1);
}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
operator<<()

  Decription: write out a rooted ordered tree in Zaki's format
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
ostream& operator<<(ostream& out, const TextTree& rhs)
{
	out << rhs.doctid << ' ' << rhs.doctid << ' ';
	
	vector<short> zakiCode;
	dfsVisit(0, rhs, zakiCode);
	out << zakiCode.size() - 1 << ' '; //Zaki's code has one less "-1" than Luccio's
	for ( short i = 0; i < zakiCode.size() - 1; i++ )
		out << zakiCode[i] << ' ';
	out << endl;
	return out;
}  

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -