⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 seqtree.cpp

📁 挖掘频繁闭序列的算法是序列挖掘算法早期比较著名的算法
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <assert.h>#include "../Global.h"#include "../ProjDB.h"#include "../MemMap.h"#if defined( _PERFORM_COMMON_PREFIX_CHECKING )// Returns a pointer to the start of the SeqNum th sequence of the projected DB.//inline int * GetStartPtr( const struct PROJ_DB *pDB, int SeqNum, int SeqIdx = 0 )inline int * GetStartPtr( const struct PROJ_DB *pDB, int SeqNum, int SeqIdx ){	int * intPtr;	if( SeqIdx >= (*pDB).m_pProjSeq[SeqNum].m_nProjCount )		assert( false );#ifdef DISK_BASED	if( (*pDB).m_pProjSeq[SeqNum].m_nProjCount == 1 )		intPtr = (int*) (*pDB).m_pProjSeq[SeqNum].m_ppSeq;	else		intPtr = (int*) (*pDB).m_pProjSeq[SeqNum].m_ppSeq[SeqIdx];#else	intPtr = (int*) (*pDB).m_pProjSeq[SeqNum].m_ppSeq[SeqIdx];#endif	return intPtr;}// Returns a pointer to the End of the Sequence that starts at StartPtr.inline int * GetEndPtr( int * StartPtr ){	int * TmpPtr;	TmpPtr = StartPtr;	while( *TmpPtr != -2 )		TmpPtr++;	return TmpPtr;}// Returns a pointer to the first frequent item pointed by StartPtr, or after it.// It returns a pointer to -1 for end of itemset, and -2 for end of the sequence.inline int * GetNextItem( int * StartPtr ){#ifdef DISK_BASED	int * DataPtr = StartPtr;	bool InFreqItemSet = false;	while( true )	{		//if( !(*DataPtr == -1 && InFreqItemSet) && *DataPtr < 0 )		if( *DataPtr < 0 )			return DataPtr;		if( (inter[*DataPtr]).count >= gSUP )			return DataPtr;		InFreqItemSet = true;		DataPtr++;	}#else	return StartPtr;#endif}// Returns true is the sequence pointed to by StartPtr has no frequent items.inline bool SeqIsEmpty( int * StartPtr ){	int * Ptr;		Ptr = GetNextItem( StartPtr );	while( *Ptr == -1 )	{		Ptr = GetNextItem( ++Ptr );	}	if( *Ptr == -2 )		return true;	else		return false;}//************************//************************SeqWrap::SeqWrap() { 	Start = NULL; 	Current = Start; 	FirstItemSet = true; 	CurItemSetIsEmpty = true;	StartIsIntra = true;}#if defined( _FIND_MAX_SEQS )SeqWrap::SeqWrap( int * Strt ) #elseinline SeqWrap::SeqWrap( int * Strt ) #endif{ 	FirstItemSet = true; 	Start = Strt;	StartIsIntra = true;	while( true )	{		if( *Start < 0 )		{			StartIsIntra = false;			break;		}		if( (intra[*Start]).count >= gSUP )			break;		Start++;	}	FirstItemSet = StartIsIntra;	Current = Start; }inline int * SeqWrap::GetFirst() { 	Current = Start;	FirstItemSet = StartIsIntra;	CurItemSetIsEmpty = true;	return Current;}inline int * SeqWrap::GetNext(){	if( Current == NULL || *Current == -2 )		return Current;	if( *Current == -1 )	{ 		FirstItemSet = false;		CurItemSetIsEmpty = true;	}	Current++;	while( true )	{		if( *Current == -2 )			return Current;		if( *Current != -1 ) 		{			if( ( !FirstItemSet && (inter[*Current]).count >= gSUP ) ||				( FirstItemSet && (intra[*Current]).count >= gSUP ) )			{				CurItemSetIsEmpty = false;				return Current;			}		} else { //*Current==-1			if( FirstItemSet )			{				FirstItemSet = false;				CurItemSetIsEmpty = true;				return Current;			} else {				if( !CurItemSetIsEmpty )				{					CurItemSetIsEmpty = true;					return Current;				}				CurItemSetIsEmpty = true;			}		}		Current++;	}}inline int * SeqWrap::GetItemSet( int ItemSetNum ){	int tmp = 0;		Current = GetFirst();	if( *Current == -1 )		Current = GetNext();	while( *Current != -2 )	{		if( *Current == -1 )		{			tmp++;			if( tmp == ItemSetNum )				break;		}		Current = GetNext();	}	return Current;	//return GetNext();}void SeqWrap::Print( FILE * aFile ){	int * TmpPtr = NULL;	int * TmpPtr2 = NULL;	fprintf( aFile, " <" );	TmpPtr = GetFirst();	if( TmpPtr!=NULL )		fprintf( aFile, " (" );	while( TmpPtr != NULL && *TmpPtr != -2 )	{		if( *TmpPtr == -1 )		{			TmpPtr2 = GetNext();			if( *TmpPtr2 == -2 )				fprintf( aFile, ") " );			else				fprintf( aFile, ")  (" );			TmpPtr = TmpPtr2;		} else {			fprintf( aFile, " %d ", *TmpPtr );			TmpPtr = GetNext();		}	}	fprintf( aFile, "> " );	fprintf( aFile, "  Support = %d\n", 0 );}#if defined( _FIND_MAX_SEQS )bool SeqWrap::IsEmpty()#elseinline bool SeqWrap::IsEmpty()#endif // defined( _FIND_MAX_SEQS ){		Current = GetFirst();	while( *Current == -1 )	{		Current = GetNext();	}	if( *Current == -2 )		return true;	else		return false;}SeqWrap::~SeqWrap(){}//************************//************************Prefix::Prefix() : SeqWrap(){ 	End = NULL;	NumOfItemSets = 0;	Sup = 0;}inline Prefix::Prefix( int * Strt, int * End ) : SeqWrap( Strt ){ 	NumOfItemSets = 0;	(*this).End = End;	if( Start == End )		{		Start = NULL;		(*this).End = NULL;		NumOfItemSets = 0;	} else {		CalcNumOfItemSets();	}	Sup = 0;}Prefix::Prefix( const struct PROJ_DB * pDB ) : SeqWrap(){//#define GetDBPrefix_PRINT_DEBUG_INFO	int i = 0;	int j = 0;	int JMax = 1;	bool Inter;	SeqWrap * aSeqWrap = NULL;	bool PrefixIsEmpty = true;#ifdef GetDBPrefix_PRINT_DEBUG_INFO	Sequence * aSeq = NULL;	Sequence * tmpSeq = NULL;	static int Cnt = 0;	tmpSeq = new Sequence( pDB );	printf( "  Original Prefix %dth Record ===> ", Cnt++ );	(*tmpSeq).Print();	delete tmpSeq;#endif		End = NULL;	NumOfItemSets = 0;	//Sup = 1;	Sup = 0; 	if( (*pDB).m_nSup > 0 )	{		// Find first none empty sequence.		for( i=0; i<(*pDB).m_nSup; i++ )		{			for( j=0; j < (*pDB).m_pProjSeq[i].m_nProjCount; j++ )			{				aSeqWrap = new SeqWrap( GetStartPtr( pDB, i, j ) );				if( !(*aSeqWrap).IsEmpty() )				{					Start = (*aSeqWrap).GetFirst();					End = GetEndPtr( Start ) - 1;					//TrimPrefix( Start ); //Trim it with itself.					PrefixIsEmpty = false;					#ifdef GetDBPrefix_PRINT_DEBUG_INFO						aSeq = new Sequence( Start, End-Start, 0, true );						printf( "  First Prefix %dth Record ===> ", i+1 );						if( aSeq ) (*aSeq).Print();						printf( "    Real First Start=%d End=%d Size=%d ===> ", Start, End, End-Start );						Print();					#endif					delete aSeqWrap;					break;				}				delete aSeqWrap;			}			if( Start != End )				break;		}	}	if( Start != End )	{		JMax = 1;		if( *(Start) == -1 )			Inter = true;		else			Inter = false;		//for( i++; i<(*pDB).m_nSup; i++ ) // For every seq in DB		for( i; i<(*pDB).m_nSup; i++ ) // For every seq in DB		{			if( !Inter )				JMax = (*pDB).m_pProjSeq[i].m_nProjCount;			//JMax = 1;			for( j=0; j < JMax; j++ )			{				aSeqWrap = new SeqWrap( GetStartPtr( pDB, i, j ) );#ifdef GetDBPrefix_PRINT_DEBUG_INFO				tmpSeq = new Sequence( (*aSeqWrap).GetFirst(), true );				printf( "  %dth Record ===> ", i+1 );				(*tmpSeq).Print();				delete tmpSeq;#endif				if( !(*aSeqWrap).IsEmpty() )				{					TrimPrefix( (*aSeqWrap).GetFirst() );					Sup++;#ifdef GetDBPrefix_PRINT_DEBUG_INFO					if( Start != End )					{						aSeq = new Sequence( Start, End-Start, 0, true );						printf( "     ++++++++++++++++ Remaining prefix is  " );						(*aSeq).Print();						printf( "        +++++Real Start=%d End=%d Size=%d prefix is ", Start, End, End-Start );						Print();					}#endif				}				delete aSeqWrap;				PrefixIsEmpty = IsEmpty();				if( PrefixIsEmpty )					break;			}							if( PrefixIsEmpty )				break;		}	}	//if( Start == End )		if( PrefixIsEmpty )		{		Start = NULL;		End = NULL;		NumOfItemSets = 0;		Sup = 0;	} else {		CalcNumOfItemSets();	}}Prefix::Prefix( const struct mem_map * pDatasetMemMap ) : SeqWrap(){	bool PrefixIsEmpty = true;	int * RecStart;  int * LastAddr = (int*) GetLastAddrOfMap( pDatasetMemMap );		NumOfItemSets = 0;	Sup = 1; 	Start = (int*) GetStartOfMap( pDatasetMemMap );	Start = GetNextItem( Start ); // Get first frequent item.	while( *Start < 0 && Start < LastAddr )		Start = GetNextItem( Start+1 ); // Get first frequent item.	End = Start;	// Find first none empty sequence.	while( End < LastAddr )	{		End = GetEndPtr(Start) - 1;		if( Start != End )			break;		else			Start = GetNextItem( End + 2 );	}	RecStart = GetNextItem( End + 2 );	while( *RecStart < 0 && RecStart < LastAddr )		RecStart = GetNextItem( RecStart + 1 );	if( Start != End )	{		while( RecStart < LastAddr )		{			TrimPrefix( RecStart );			PrefixIsEmpty = IsEmpty();			if( PrefixIsEmpty )				break;			Sup++;			RecStart = GetEndPtr(RecStart) + 1;			while( *RecStart < 0 && RecStart < LastAddr )				RecStart = GetNextItem( RecStart + 1 );		}	}	if( PrefixIsEmpty )		{		Start = NULL;		End = NULL;		NumOfItemSets = 0;		Sup = 0;	} else {		CalcNumOfItemSets();	}}// Returns number of itemsets that have at least one frequent item.// It starts from StartPtr to EndPtr, if EndPtr is NULL it will search till end of the sequence.inline void Prefix::CalcNumOfItemSets(){	//bool LastWasNegOne = false;	int Result = 0;	if( Start==End )	{		NumOfItemSets = 0;		return;	}	Current = GetFirst();	if( End==NULL )	{		while( *Current!=-2 )		{			//LastWasNegOne = ( *Current==-1 );			Current = GetNext();			if( *Current==-1 )				Result++;		}	} else {		while( Current<End )		{			//LastWasNegOne = ( *Current==-1 );			Current = GetNext();			if( *Current==-1 )				Result++;		}	}	//if( LastWasNegOne )	//	Result--;	NumOfItemSets = Result;}// Number of frequent elements plus number of itemset seperators, -1s, but last one.inline int Prefix::Size(){	//bool LastWasNegOne = false;	int tmp = 0;	Current = GetFirst();	while( Current < End && *Current != -2 )	{		//LastWasNegOne = ( *Current==-1 );

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -