⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cet.cpp

📁 数据流关联规则挖掘算法moment
💻 CPP
📖 第 1 页 / 共 3 页
字号:
Decription: a helper function for addition() * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
int CET::addHelp(const int tid,
				 const unsigned short previousPrefix,
				 const map<FPNodePtr,pair<int,int> > & previousOccurrence,
				 const vector<unsigned short> & parentItemset,
				 const vector<bool> & parentIsNew,
				 const unsigned short startPosition,
				 const FP & FPTree,
				 TreeNode & node)
{
	int maxNewSupport = 0;

	////debug
	//cout << "currentPrefix: " << endl;
	//for ( int i = 0; i < currentPrefix.size(); i++ )
	//	cout << currentPrefix[i] << " ";
	//cout << endl;
	//cout << "parentItemset: " << endl;
	//for ( int i = 0; i < parentItemset.size(); i++ )
	//	cout << parentItemset[i] << " ";
	//cout << endl;
	//cout << "parentIsNew: " << endl;
	//for ( int i = 0; i < parentIsNew.size(); i++ )
	//	cout << parentIsNew[i] << " ";
	//cout << endl;


	unsigned short myPrefix;
	map<FPNodePtr, pair<int,int> > myOccurrence;
	vector<unsigned short> myItemset;
	vector<bool> myIsNew;

	map<unsigned short, unsigned short> inverse;
	vector<map<FPNodePtr, pair<int,int> > > occurrenceFamily;

	bool needLoadOccurrence = false;
	bool isOccurrenceLoaded = false;
	bool existNewFrequent = false;

	unsigned short newStart = 0;
	unsigned short maxEnd = 0;

	vector<unsigned short> tempItemset;
	vector<bool> tempIsNew;

	//step 1, insert new items propogated down
	for ( unsigned short s = startPosition; s < parentItemset.size(); s++ ) {
		////debug
		//cout << "the item is: " << parentItemset[s] << endl;
		if ( parentIsNew[s] ) { //a new item propogated down
			node.myChildren[parentItemset[s]].isInfrequent = true; //initialize a new child
			tempItemset.push_back(parentItemset[s]);
			tempIsNew.push_back(true);
			needLoadOccurrence = true;
			inverse.insert(make_pair(parentItemset[s], newStart));
			newStart++;
			maxEnd = parentItemset[s]; //assuming that parentItemset is in order
		}
		else { //the updated item was one member of the family
			node.myChildren[parentItemset[s]].mySupport++;
			node.myChildren[parentItemset[s]].myTidSum += tid;
			if ( maxNewSupport < node.myChildren[parentItemset[s]].mySupport )
				maxNewSupport = node.myChildren[parentItemset[s]].mySupport;
			if ( node.myChildren[parentItemset[s]].mySupport < SUPPORT ) { 
				continue;//no need to pass down
			}
			else {
				tempItemset.push_back(parentItemset[s]);
				if ( node.myChildren[parentItemset[s]].isInfrequent ) { 
					//a newly frequent node
					node.myChildren[parentItemset[s]].mySupport = 0;
					node.myChildren[parentItemset[s]].myTidSum = 0;			
					tempIsNew.push_back(true);
					needLoadOccurrence = true;
					inverse.insert(make_pair(parentItemset[s], newStart));
					newStart++;
					maxEnd = parentItemset[s]; //assuming that parentItemset is in order
				}
				else {
					tempIsNew.push_back(false);
				}
			}
		}
	} //end for

	//step 2, build the occurrence lists
	if ( needLoadOccurrence ) {
		occurrenceFamily.resize(newStart);
		isOccurrenceLoaded = true;
		myPrefix = currentPrefix.size(); //most up-to-date

		if ( currentPrefix.size() == 0 ) { //if still at the root
			for ( map<unsigned short, unsigned short>::iterator pos = inverse.begin();
				pos != inverse.end(); ++pos )
			{
				IdxNodePtr posIdx = FPTree.headTable[pos->first].right;				while ( posIdx != 0 ) {					occurrenceFamily[pos->second].insert(make_pair(posIdx->locInFP, 						make_pair(posIdx->locInFP->count,posIdx->locInFP->tid_sum)));					node.myChildren[pos->first].mySupport += posIdx->locInFP->count;					node.myChildren[pos->first].myTidSum += posIdx->locInFP->tid_sum;					posIdx = posIdx->right;				}
			}
		}
		else {
			getOccurrence(previousPrefix, previousOccurrence, FPTree, myOccurrence);
			for ( map<FPNodePtr, pair<int,int> >::iterator pos = myOccurrence.begin();
				pos != myOccurrence.end(); ++pos ) 
			{
				FPNodePtr  posFP = pos->first; //find the location in FP
				int tempCount = pos->second.first; //support
				int tempTidSum = pos->second.second; //tid sum
				posFP = posFP->parent;
				while ( posFP->parent != 0 && posFP->item <= maxEnd ) { //while not the root
					map<unsigned short, unsigned short>::iterator pos2 =
						inverse.find( posFP->item );
					if ( pos2 != inverse.end() ) {
						node.myChildren[posFP->item].mySupport += tempCount;
						node.myChildren[posFP->item].myTidSum += tempTidSum;

						map<FPNodePtr,pair<int,int> >::iterator pos3 = 
							occurrenceFamily[pos2->second].find(posFP);
						if ( pos3 != occurrenceFamily[pos2->second].end() ) {
							pos3->second.first += tempCount;
							pos3->second.second += tempTidSum;
						}
						else {
							occurrenceFamily[pos2->second].insert(make_pair(posFP, 
								make_pair(tempCount,tempTidSum)));
						}
					}
					posFP = posFP->parent;
				}
			}
		}// end else
	}

	vector<bool>::iterator pos8 = tempIsNew.begin();
	for ( vector<unsigned short>::iterator pos7 = tempItemset.begin();
		pos7 != tempItemset.end(); ++pos7, ++pos8) 
	{
		if ( maxNewSupport < node.myChildren[*pos7].mySupport )
			maxNewSupport = node.myChildren[*pos7].mySupport;

		if ( node.myChildren[*pos7].mySupport >= SUPPORT ) {
			node.myChildren[*pos7].isInfrequent = false;
			myItemset.push_back(*pos7);
			myIsNew.push_back(*pos8);
			if ( *pos8 == true ) existNewFrequent = true;
		}

	}

	////debug
	//cout << "myItemset: " << endl;
	//for ( int i = 0; i < myItemset.size(); i++ )
	//	cout << myItemset[i] << " ";
	//cout << endl;
	//cout << "myIsNew: " << endl;
	//for ( int i = 0; i < myIsNew.size(); i++ )
	//	cout << myIsNew[i] << " ";
	//cout << endl;


	//step 3, recursively update each child
	unsigned short myStartPosition = 0;
	for ( Family::iterator pos1 = node.myChildren.begin(); 
		pos1 != node.myChildren.end() && myStartPosition < myItemset.size(); ++pos1 )
	{
		if ( pos1->first > myItemset.back() ) break; //done! (not possible)

		//case 1, an old node, not in the myItemset, 
		//need to proprogate the update down ONE LEVEL
		else if ( pos1->first < myItemset[myStartPosition] ) {
			if ( pos1->second.isInfrequent || pos1->second.isUnpromising ||
				!existNewFrequent ) 
			{
				continue;
			}
			set<unsigned short> myExtendItemset;
			maxEnd = 0;
			for ( int j = myStartPosition; j < myItemset.size(); j++ ) {
				if ( myIsNew[j] ) {
					myExtendItemset.insert(myItemset[j]);
					pos1->second.myChildren[myItemset[j]].isInfrequent = true;
					maxEnd = myItemset[j];
				}
			}

			if ( myExtendItemset.size() != 0 ) {
				currentPrefix.push_back(pos1->first);
				map<FPNodePtr, pair<int,int> > childOccurrence;
				getOccurrence(myPrefix, myOccurrence, FPTree, childOccurrence);
				//if comes here, there must be newly frequent item in this level
				//so myOccurrence must have been populated

				for ( map<FPNodePtr, pair<int,int> >::iterator pos5 = childOccurrence.begin();
					pos5 != childOccurrence.end(); ++pos5 ) 
				{
					FPNodePtr  posFP = pos5->first; //find the location in FP
					int tempCount = pos5->second.first; //support
					int tempTidSum = pos5->second.second; //tid sum
					posFP = posFP->parent;
					while ( posFP->parent != 0 && posFP->item <= maxEnd ) { //while not the root
						set<unsigned short>::iterator pos6 =
							myExtendItemset.find( posFP->item );
						if ( pos6 != myExtendItemset.end() ) {
							pos1->second.myChildren[*pos6].mySupport += tempCount;
							pos1->second.myChildren[*pos6].myTidSum += tempTidSum;
						}
						posFP = posFP->parent;
					}
				}

				for ( set<unsigned short>::iterator pos6 = myExtendItemset.begin();
					pos6 != myExtendItemset.end(); ++pos6 ) 
				{
					if ( pos1->second.childrenSupport < 
						pos1->second.myChildren[*pos6].mySupport )
					{
						pos1->second.childrenSupport = 
							pos1->second.myChildren[*pos6].mySupport;
					}
				}
				currentPrefix.pop_back();
			}
		}

		//case 2, an old node, in the itemset, need to add items down
		//fact: pos1->first == myItemset[myStartPosition]
		else if ( !myIsNew[myStartPosition] ) {
			if ( pos1->second.isInfrequent ) { //need not to pass down, redundant
				continue;
			}
			else if ( pos1->second.isUnpromising ) { //if originally unpromising,

				//add the item to the end of currentPrefix
				currentPrefix.push_back(pos1->first);

				bool stillClosed = true;

				//check left-pruning
				pair<HashClosed::const_iterator, HashClosed::const_iterator> p = 
					closedItemsets.equal_range(pos1->second.myTidSum);

				for ( HashClosed::const_iterator pos = p.first; pos != p.second; ++pos ) {
					if ( pos->second.first == pos1->second.mySupport 
						&& isSubset(pos->second.second, currentPrefix ) ) {
							pos1->second.isUnpromising = true;
							stillClosed = false;
							break;
						}
				}

				if ( stillClosed ) {
					pos1->second.isUnpromising = false;
					Family::iterator pos2 = pos1;
					vector<bool> tempBool(FPTree.headTable.size(), false);
					maxEnd = 0;
					while ( pos2 != node.myChildren.end() ) {
						if ( pos2->second.mySupport >= SUPPORT ) {
							tempBool[pos2->first] = true;
							maxEnd = pos2->first;
						}
						++pos2;
					}

					map<FPNodePtr, pair<int,int> > childOccurrence;
					getOccurrence(
						(isOccurrenceLoaded? myPrefix : previousPrefix),
						(isOccurrenceLoaded? myOccurrence : previousOccurrence),
						FPTree, childOccurrence);

					pos1->second.childrenSupport = 
						initializeHelp(pos1->second, childOccurrence, 
						pos1->first, maxEnd, tempBool);

					numberOfExploreCall ++;

					//second, check if I am closed
					if ( pos1->second.childrenSupport < pos1->second.mySupport ) {
						pos1->second.isClosed = true;
						closedItemsets.insert(make_pair(pos1->second.myTidSum, 
							make_pair(pos1->second.mySupport,currentPrefix)));
					}
				}

				//remove the item from the end of currentPrefix
				currentPrefix.pop_back();
			}
			else { //if originally not unPromising and not inFrequent
				int tempSupport = 0;

				currentPrefix.push_back(pos1->first);
				if ( myStartPosition+1 < myItemset.size() ) {
					//if I am not the last item in the itemset to be updated
					tempSupport = addHelp(tid, 
						(isOccurrenceLoaded? myPrefix : previousPrefix),
						(isOccurrenceLoaded? myOccurrence : previousOccurrence),
						myItemset,
						myIsNew,
						myStartPosition+1,
						FPTree,
						pos1->second);
					if ( tempSupport > pos1->second.childrenSupport )
						pos1->second.childrenSupport = tempSupport;
				}


				//if originally closed, still closed, but need to update the hash table
				if ( pos1->second.isClosed ) {
					pair<HashClosed::iterator, HashClosed::iterator> p = 
						closedItemsets.equal_range(pos1->second.myTidSum-tid); //old value

					for ( HashClosed::iterator pos = p.first; pos != p.second; ++pos ) {
						if ( pos->second.first == (pos1->second.mySupport-1)
							&& pos->second.second == currentPrefix  ) {
								closedItemsets.erase(pos);
								break;
							}
					}
					closedItemsets.insert(make_pair(pos1->second.myTidSum, 
						make_pair(pos1->second.mySupport,currentPrefix)));
				}

				else if ( pos1->second.childrenSupport < pos1->second.mySupport ) { 
					//a new closed itemset
					closedItemsets.insert(make_pair(pos1->second.myTidSum, 
						make_pair(pos1->second.mySupport,currentPrefix)));
					pos1->second.isClosed = true;
				}

				currentPrefix.pop_back();
			}
			myStartPosition++;
		}

		//case 3, a newly frequent item
		//two types: passed down from parent, or newly frequent in this level
		else {
			//add the item to the end of currentPrefix
			currentPrefix.push_back(pos1->first);

			bool stillClosed = true;

			//check left-pruning
			pair<HashClosed::const_iterator, HashClosed::const_iterator> p = 
				closedItemsets.equal_range(pos1->second.myTidSum);

			for ( HashClosed::const_iterator pos = p.first; pos != p.second; ++pos ) {
				if ( pos->second.first == pos1->second.mySupport 
					&& isSubset(pos->second.second, currentPrefix ) ) {
						pos1->second.isUnpromising = true;
						stillClosed = false;
						break;
					}
			}

			if ( stillClosed ) {
				pos1->second.isUnpromising = false;
				Family::iterator pos2 = pos1;
				vector<bool> tempBool(FPTree.headTable.size(), false);
				maxEnd = 0;
				while ( pos2 != node.myChildren.end() ) {
					if ( pos2->second.mySupport >= SUPPORT ) {
						tempBool[pos2->first] = true;
						maxEnd = pos2->first;
					}
					++pos2;
				}

				map<FPNodePtr, pair<int,int> > childOccurrence;
				getOccurrence( myPrefix + 1,
					occurrenceFamily[inverse[pos1->first]],
					FPTree, childOccurrence);

				pos1->second.childrenSupport = 
					initializeHelp(pos1->second, childOccurrence,
					pos1->first, maxEnd, tempBool);

				numberOfExploreCall ++;

				//second, check if I am closed
				if ( pos1->second.childrenSupport < pos1->second.mySupport ) {
					pos1->second.isClosed = true;
					closedItemsets.insert(make_pair(pos1->second.myTidSum, 
						make_pair(pos1->second.mySupport,currentPrefix)));
				}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -