⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 occlonglist.cpp

📁 包含最大频繁序列的挖掘; 包含层次聚类算法
💻 CPP
字号:
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
Class: OccLongList

Description: Used to store the occurrence list of a rooted 
ordered pattern tree, for CMOrderedTreeMiner algorithms
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#pragma   warning   (disable:   4083   4786)
#include "OccLongList.h"

typedef map<int, vector< vector<short> > >  MAP_frequency;
void OccLongList::insert(int newdocTid, int newpathTid, int newLocation)
{
	pathocc occ;
	occ.dococ=newdocTid;
	occ.pathoc = newpathTid;
	occ.pos=newLocation;
    occurrenceLong.push_back(occ);
    if ( lastTid != newdocTid ) { //if comes from a new transaction
		lastTid = newdocTid;
		mySupport++;
	}
}

bool OccLongList::combineList(const OccLongList& mother, const OccList& newNodes)
{
	occurrenceLong.clear();
	int nodeId ;
	int motherId ;

	nodeId = 0;
	motherId = 0;
	mySupport = 0;
	int tempTid = -1;
	while ( nodeId < newNodes.occurrence.size() ) {
		//first, find the correct mother
		while ( motherId != newNodes.occurrence[nodeId].first ) motherId++;
		occurrenceLong.push_back(mother.occurrenceLong[motherId]);
		occurrenceLong.back().pos = newNodes.occurrence[nodeId].second;
		if ( tempTid != mother.occurrenceLong[motherId].dococ ) {
			tempTid = mother.occurrenceLong[motherId].dococ;
			mySupport++;
		}
		while ( (nodeId+1) < newNodes.occurrence.size() 
			&& (newNodes.occurrence[nodeId].first == newNodes.occurrence[nodeId+1].first) ) {
				nodeId++;
				occurrenceLong.push_back(mother.occurrenceLong[motherId]);
			    occurrenceLong.back().pos = newNodes.occurrence[nodeId].second;
			}
			nodeId++;
	}
	return true;
}

int includepath(vector<short> a, vector<short> b)
{
	int i,j;
	if(a.size()>b.size())
	{
		i=j=0;
		while (i<a.size() && j<b.size())
		{
			if(a[i]!=b[j]) i++;
			else{
				j++; i++;
			}
		}
		if(j==b.size()) return 1;
	}
	if(b.size()>a.size())
	{
        i=j=0;
		while (i<b.size() && j<a.size())
		{
			if(b[i]!=a[j]) i++;
			else{
				j++; i++;
			}
		}
		if(j==a.size()) return -1;
	}
	return 0;
}
void OccLongList::explore(const vector<bool>& isFrequent, 
						  const vector<TextTree>& database,
						  const int& support,
						  map<int, vector< vector<short> > >& frequency,
						  vector< vector<short> >& maximal,
						  vector< vector<pathocc > >& maximalocclonglist)
{
	//for debug
	//cout << "support of current path is: " << mySupport << endl;
	//for(int m=0;m<currentPath.size();m++)
	//	cout<<currentPath[m]<<" ";
	//cout<<endl;
	
	int tempV = currentPath.size();
	MAP_frequency::iterator posfrequency;
	posfrequency = frequency.find(tempV);
	if(posfrequency != frequency.end())
		frequency[tempV].push_back(currentPath);
	else {
		vector< vector<short> > aa;
		frequency.insert(MAP_frequency::value_type(tempV,aa));
		frequency[tempV].push_back(currentPath);
	}

	//search the maximal
    vector< vector<short> >::iterator posmaximal;
	vector< vector<pathocc> >::iterator posmaximalocc;
	bool insertflag=true;
	if( maximal.empty()) {
		maximal.push_back(currentPath);
		maximalocclonglist.push_back(occurrenceLong);
	}
	else{
		insertflag=true;
		//扫描整个maximal数组
		posmaximalocc = maximalocclonglist.begin();
		for(posmaximal = maximal.begin(); posmaximal!=maximal.end();++posmaximal,++posmaximalocc)
		{
			/*cout<<"currentPath: ";
			for(int m=0;m<currentPath.size();m++)
					cout<<currentPath[m]<<" ";
	         cout<<endl;
			 cout<<"pospath: ";
			 for( m=0; m< posmaximal->size();m++)
				 cout<< (*posmaximal)[m]<<" ";
	         cout<<endl;*/

			if( includepath(*posmaximal, currentPath) == 1){
				//posmaximal包含currentpath
				insertflag=false;
			}
			else if( includepath(*posmaximal, currentPath) == -1){
				//currentpath包含posmaximal
				maximalocclonglist.erase(posmaximalocc);
				maximal.erase(posmaximal);
				posmaximal--;
				posmaximalocc--;
			}
		}
        if(insertflag) 
		{
			maximal.push_back(currentPath);
			maximalocclonglist.push_back(occurrenceLong);
		}
	}	

	//step 3, explore all the expansions
	//step 3.1, explore the children of the rightmost node 
	map<short,OccList> potentialChildren;
	map<short,OccList>::iterator pos;
	for (int  n = 0; n < occurrenceLong.size(); n++ ) {
		int myTid = occurrenceLong[n].dococ;
		int myPathid = occurrenceLong[n].pathoc;
		int myLocation = occurrenceLong[n].pos;
         
		int k = myLocation+1;
		//here, redundancy must be recorded also.
		while ( k <database[myTid].path[myPathid].second.size() ) {
			if ( isFrequent[database[myTid].path[myPathid].second[k] - MIN_VERTEX] == true) {
				potentialChildren[database[myTid].path[myPathid].second[k] - MIN_VERTEX].insert(myTid,k,n);
			}
			k++;
		}
	}
    //for test
	/*for( pos= potentialChildren.begin() ; pos !=potentialChildren.end();++pos)
	{
		cout<<pos->first+MIN_VERTEX<<" "<<pos->second.mySupport<<endl;
		for(m=0;m<pos->second.occurrence.size();m++)
			cout<<pos->second.occurrence[m].first<<" "<<pos->second.occurrence[m].second<<endl;
		cout<<endl;
	}*/
	for ( pos = potentialChildren.begin(); pos != potentialChildren.end(); ++pos ) {
		if ( pos->second.mySupport >= support ) { //a frequent extension!
			currentPath.push_back(pos->first + MIN_VERTEX);
			OccLongList newLongList;
		    newLongList.combineList(*this,pos->second); 
			newLongList.explore(isFrequent,database,support,frequency,maximal,maximalocclonglist);
			currentPath.erase(currentPath.begin()+currentPath.size()-1);
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -