⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 projdb.cpp

📁 挖掘频繁闭序列的算法是序列挖掘算法早期比较著名的算法
💻 CPP
📖 第 1 页 / 共 2 页
字号:
			  proj_db[nCount].m_nPatLen = (pDB->m_nPatLen+1);			  proj_db[nCount].m_pnPat = (int*)memalloc(j);				if (pDB->m_nPatLen == 1) 				{					proj_db[nCount].m_pnPat[0] = (int) pDB->m_pnPat;				} else {					memcpy (proj_db[nCount].m_pnPat, pDB->m_pnPat, j-sizeof(int));				}        proj_db[nCount].m_pnPat[pDB->m_nPatLen] = i;#if defined( _USE_OUTPUT_BUFFER )				proj_db[nCount].OutputBuf = OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+1, intra[i].count );				(*proj_db[nCount].OutputBuf).Parent = (*pDB).OutputBuf;				(*(*pDB).OutputBuf).ReferenceCount++;#else        OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+1, intra[i].count );#endif			  proj_db[nCount].m_nMaxSup = intra[i].count;				n_total_mem+=(nSize=intra[i].count*sizeof(struct PROJ_SEQ));			  proj_db[nCount].m_pProjSeq = (struct PROJ_SEQ*)memalloc(nSize);        memset (proj_db[nCount].m_pProjSeq, 0, nSize);			  proj_db[nCount].m_nVer = -1;			  proj_db[nCount].m_nSup = 0;				#if defined( _CALC_I_NUM_OF_ITEMS )					proj_db[nCount].NumOfItems = 0;					proj_db[nCount].ItemIsIntra=true;					proj_db[nCount].Item=i;				#endif			  intra_freq_idx[i] = nCount;			  nCount++;      }       if (inter[i].count>=gSUP)			{			  proj_db[nCount].m_nPatLen = (pDB->m_nPatLen+2);			  proj_db[nCount].m_pnPat = (int*)memalloc(sizeof(int)+j);				if (pDB->m_nPatLen == 1) 				{					proj_db[nCount].m_pnPat[0] = (int) pDB->m_pnPat;				} else {	        memcpy (proj_db[nCount].m_pnPat, pDB->m_pnPat, j-sizeof(int));				}			  proj_db[nCount].m_pnPat[pDB->m_nPatLen] = -1;			  proj_db[nCount].m_pnPat[pDB->m_nPatLen+1] = i;#if defined( _USE_OUTPUT_BUFFER )        proj_db[nCount].OutputBuf = OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+2, inter[i].count );				(*proj_db[nCount].OutputBuf).Parent = (*pDB).OutputBuf;				(*(*pDB).OutputBuf).ReferenceCount++;#else        OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+2, inter[i].count );#endif			  proj_db[nCount].m_nMaxSup = inter[i].count;				n_total_mem+=(nSize=inter[i].count*sizeof(struct PROJ_SEQ));			  proj_db[nCount].m_pProjSeq = (struct PROJ_SEQ*)memalloc(nSize);        memset (proj_db[nCount].m_pProjSeq, 0, nSize);			  proj_db[nCount].m_nVer = -1;			  proj_db[nCount].m_nSup = 0;				#if defined( _CALC_I_NUM_OF_ITEMS )					proj_db[nCount].NumOfItems = 0;					proj_db[nCount].ItemIsIntra=false;					proj_db[nCount].Item=i;				#endif			  inter_freq_idx[i] = nCount;			  nCount++;      }    }		#if defined( _USE_STRING_ELEMINATION )		bool CheckStrings = false;		if( (*MainSeqTree).IsContained( new Sequence(pDB) ) )		{			CheckStrings = true;		}		#endif // defined( _USE_STRING_ELEMINATION )    // scan database again, do projection    for (nCount=0; nCount<pDB->m_nSup;)		{      nCount++; nProjCnt=pDB->m_pProjSeq[nCount-1].m_nProjCount;      for (i=0; i<nProjCnt; i++)			{#ifdef DISK_BASED				if (nProjCnt==1) dataset = (int*) pDB->m_pProjSeq[nCount-1].m_ppSeq;				else dataset = pDB->m_pProjSeq[nCount-1].m_ppSeq[i];#else        dataset = pDB->m_pProjSeq[nCount-1].m_ppSeq[i];#endif				#if defined( _USE_STRING_ELEMINATION )				if( CheckStrings )				{					if( (*MainSeqTree).IsContained( pDB, dataset ) )					{						//Sequence * aS = new Sequence( pDB, dataset );						//(*aS).Print();						//printf( " Sequence Eleminated.\n" );						continue;					}				}				#endif // defined( _USE_STRING_ELEMINATION )        // counting intra-element items.	      for (; *dataset>=0; dataset++)				{          // eat up consecutive identical numbers.          while (dataset[0]==dataset[1]) dataset++;										j=intra_freq_idx[*dataset];          if (dataset[2]==-2 || j<0 || proj_db[j].m_nVer>=nCount) continue;					tempDB = proj_db + j;					tempSeq = tempDB->m_pProjSeq + tempDB->m_nSup;					tempDB->m_nVer = nCount;					#if defined( _CALC_I_NUM_OF_ITEMS )								(*tempDB).NumOfItems += (lastAddr - dataset );								/*if ((*tempDB).NumOfItems == 1) {									printf("INTRAERROR\n");									printf("%d\n", lastAddr-dataset);									exit(1);								}*/					#endif          for (buf_idx[0]=int(dataset+1), l=1, j=i; j<nProjCnt; j++)					{						if (j==i) d=dataset+1; else d=pDB->m_pProjSeq[nCount-1].m_ppSeq[j];            while (*d!=-1 && d[2]!=-2 && *d!=*dataset) d++;						if (d[2]!=-2 && *d==*dataset && d[1]!=-1) buf_idx[l++]=int(d+1);          }#ifdef DISK_BASED					tempDB->m_nSup++;					tempSeq->m_nProjCount = l;					if (l==1) 					{						tempSeq->m_ppSeq = (int**) buf_idx[0];					} else {						n_total_mem+=(nSize=sizeof(int*)*l);						tempSeq->m_ppSeq = (int**)memalloc(nSize);						memcpy (tempSeq->m_ppSeq, buf_idx, nSize);					}#else					buf_idx[0]=nSeqLen=0; bIntra=true; d=dataset+1;					for (k=l, j=l=1; *d!=-2; d++) 					{						for (bIndx=false; *d!=-1; d++) 						{							if (j<k && (int*)(buf_idx[j])==d) 							{ 								bIntra=bIndx=true; j++; buf_idx[l++]=nSeqLen; 							}							if (bIntra) 							{ 								if (intra_freq_idx[*d]<0 && inter_freq_idx[*d]<0) continue;							} else if (inter_freq_idx[*d]<0) continue;							bufseq[nSeqLen++]=*d; bIndx=false;						}						bIntra=false; if (bIndx) l--;						if (nSeqLen==0 || bufseq[nSeqLen-1]!=-1) bufseq[nSeqLen++]=-1;					}					if (nSeqLen<2) continue;          tempDB->m_nSup++;					bufseq[nSeqLen++]=-2;          tempSeq->m_nProjCount = l;					n_total_mem+=(nSize=sizeof(int*)*l);					tempSeq->m_ppSeq=(int**)memalloc(nSize);					n_total_mem+=(nSize=sizeof(int*)*nSeqLen);					tempDB->m_nSeqSize=nSize;					tempSeq->m_ppSeq[0]=(int*)memalloc(nSize);					memcpy(tempSeq->m_ppSeq[0], bufseq, nSize);					for (j=1; j<l; j++) tempSeq->m_ppSeq[j]=tempSeq->m_ppSeq[0]+buf_idx[j];#endif        } // end for counting intra-element items.        // for inter we only need to work with the longest instance        // of a projected sequence. ie. the first one (i==0).        if (i!=0) continue;        for (dataset++; *dataset!=-2; dataset++)				{          // counting inter-element items.	        for (; *dataset!=-1; dataset++)					{            // eat up consecutive identical numbers.            while (dataset[0]==dataset[1]) dataset++;						j=inter_freq_idx[*dataset];						if (dataset[2]==-2 || j<0 || proj_db[j].m_nVer>=nCount) continue;						tempDB = proj_db + j;						tempSeq = tempDB->m_pProjSeq + tempDB->m_nSup;						tempDB->m_nVer = nCount;												#if defined( _CALC_I_NUM_OF_ITEMS )								(*tempDB).NumOfItems += (lastAddr - dataset);								/*if ((*tempDB).NumOfItems == 1) {									printf("INTERERROR\n");									printf("%d\n", lastAddr-dataset);									exit(1);								}*/						#endif						for (d=dataset+1, buf_idx[0]=int(d), l=1; d[2]!=-2; d++)						{ 							if (*d==*dataset && d[1]!=-1) buf_idx[l++] = int(d+1);						}#ifdef DISK_BASED						tempDB->m_nSup++;						tempSeq->m_nProjCount = l;						if (l==1) 						{							tempSeq->m_ppSeq = (int**) buf_idx[0];						} else {							n_total_mem+=(nSize=sizeof(int*)*l);							tempSeq->m_ppSeq = (int**)memalloc(nSize);							memcpy (tempSeq->m_ppSeq, buf_idx, nSize);						}#else						buf_idx[0]=nSeqLen=0; d=dataset+1;						for (k=l, j=l=1; *d!=-2; d++) 						{							for (bIndx=false; *d!=-1; d++) 							{								if (j<k && (int*)(buf_idx[j])==d) { bIndx=true; j++; buf_idx[l++]=nSeqLen; }								if (inter_freq_idx[*d]<0) continue;								bufseq[nSeqLen++]=*d; bIndx=false;							}							if (bIndx) l--;							if (nSeqLen==0 || bufseq[nSeqLen-1]!=-1) bufseq[nSeqLen++]=-1;						}						if (nSeqLen<2) continue;						tempDB->m_nSup++;						bufseq[nSeqLen++]=-2;						tempSeq->m_nProjCount = l;						n_total_mem+=(nSize=sizeof(int*)*l);						tempSeq->m_ppSeq=(int**)memalloc(nSize);						n_total_mem+=(nSize=sizeof(int*)*nSeqLen);						tempDB->m_nSeqSize=nSize;						tempSeq->m_ppSeq[0]=(int*)memalloc(nSize);						memcpy(tempSeq->m_ppSeq[0], bufseq, nSize);						for (j=1; j<l; j++) tempSeq->m_ppSeq[j]=tempSeq->m_ppSeq[0]+buf_idx[j];#endif          }        } // end for counting inter-element items.      } // end of all projection instances of a sequence.    } // end of all sequences.  } // enf if number of projections is greater than 0.#if defined( _PERFORM_COMMON_PREFIX_CHECKING )DONE:#endif // defined( _PERFORM_COMMON_PREFIX_CHECKING )	if (n_max_mem<n_total_mem) n_max_mem = n_total_mem;	if (pDB->m_nPatLen > 1) 	{		n_total_mem-=(pDB->m_nPatLen*sizeof(int));		freemem ((void**) &(pDB->m_pnPat));	}  for (i=0; i<pDB->m_nSup; i++) 	{#ifndef DISK_BASED		n_total_mem-=pDB->m_nSeqSize;    freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq[0]));#endif		if (pDB->m_pProjSeq[i].m_nProjCount > 1) 		{			n_total_mem-=(pDB->m_pProjSeq[i].m_nProjCount*sizeof(int*));			freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq));		}	}	n_total_mem-=(pDB->m_nMaxSup*sizeof(struct PROJ_SEQ));  freemem ((void**) &(pDB->m_pProjSeq));  // PrintProjDBs(proj_db, *pnFreqCount);  return proj_db;}//this part is copied from make_projdb_from_projected_db's tail part.#if defined (_ANOTHER_CLOSED_APPROACH)void clean_projcted_db(const struct PROJ_DB* pDB, int* pnFreqCount){	int i;	if (n_max_mem<n_total_mem) n_max_mem = n_total_mem;	if (pDB->m_nPatLen > 1) 	{		n_total_mem-=(pDB->m_nPatLen*sizeof(int));		freemem ((void**) &(pDB->m_pnPat));	}  for (i=0; i<pDB->m_nSup; i++) 	{#ifndef DISK_BASED		n_total_mem-=pDB->m_nSeqSize;    freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq[0]));#endif		if (pDB->m_pProjSeq[i].m_nProjCount > 1) 		{			n_total_mem-=(pDB->m_pProjSeq[i].m_nProjCount*sizeof(int*));			freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq));		}	}	n_total_mem-=(pDB->m_nMaxSup*sizeof(struct PROJ_SEQ));  freemem ((void**) &(pDB->m_pProjSeq));}#endif//////////////////////////////////////////////////////////////////////// Local functions.//////////////////////////////////////////////////////////////////////void PrintProjDBs(const struct PROJ_DB *proj_db, const int nCount){	int i=0, j=0, k=0, l=0, nProjCnt=0, *dataset=0;	printf("\nProjected databases:\n");	for (i=0; i<nCount; i++){		printf("Proj. DB. for (");		if (proj_db[i].m_nPatLen == 1) printf("%d", int(proj_db[i].m_pnPat));		else for (j=0; j<proj_db[i].m_nPatLen; j++) {      if (proj_db[i].m_pnPat[j]==-1) printf(")");			else if (j>0 && proj_db[i].m_pnPat[j-1]==-1) printf(" (%d", proj_db[i].m_pnPat[j]);      else if (j>0) printf(" %d", proj_db[i].m_pnPat[j]);      else printf("%d", proj_db[i].m_pnPat[j]);		}		printf("\n ");		#if defined( _CALC_I_NUM_OF_ITEMS )			printf( "NumOfItems = %d m_nSupport= %d  maxSupport= %d totalmem= %d maxmem= %d\n", 								proj_db[i].NumOfItems, proj_db[i].m_nSup, proj_db[i].m_nMaxSup, n_total_mem, n_max_mem );			/*if (proj_db[i].NumOfItems == 1) {				printf("WRONGPRINT\n");				exit(1);			}*/		#endif	return;    for (j=0; j<proj_db[i].m_nSup; j++){			nProjCnt = proj_db[i].m_pProjSeq[j].m_nProjCount;      for (k=0; k<nProjCnt; k++){#ifdef DISK_BASED				if (nProjCnt==1) dataset = (int*) proj_db[i].m_pProjSeq[j].m_ppSeq;				else dataset = proj_db[i].m_pProjSeq[j].m_ppSeq[k];#else        dataset = proj_db[i].m_pProjSeq[j].m_ppSeq[k];#endif        for(l=0; dataset[l]!=-2; l++){          if (dataset[l]==-1) printf(")");          else if (l>0 && dataset[l-1] == -1) printf(" (%d", dataset[l]);           else if (l>0) printf(" %d", dataset[l]);          else printf("%d", dataset[l]);         } // end of an instance for a projected sequence.        printf("\n ");      } // end of all instances for a projected sequence.      printf("\n ");    } // end of all projected sequences.	}}#if defined( _USE_OUTPUT_BUFFER )//inline Sequence * OutputPattern( const int *const pat, const int nPatLen, const int nSup )Sequence * OutputPattern( const int *const pat, const int nPatLen, const int nSup ){	int i=0, l=0;	Sequence * aSeq = new Sequence( (int *) pat, (int) nPatLen, (int) nSup );	/////////////////#if defined( _WRITE_FREQUENT_FILE )  for (; i<nPatLen; i++){ 	  fprintf(gpFreqFile, "(%d", pat[i]);    for (i++, l++; (i<nPatLen) && (pat[i]!=-1); i++){      l++;      fprintf (gpFreqFile, " %d", pat[i]);    }    fprintf(gpFreqFile, ") ");  }	fprintf(gpFreqFile, ": %f\n", double(nSup)/double(gnCustCount));#else  for( ; i<nPatLen; i++ )		if( pat[i] != -1 )			l++;#endif	gnArrLargeCount[l]++;	return aSeq;}#else // defined( _USE_OUTPUT_BUFFER )//inline bool OutputPattern( const int *const pat, const int nPatLen, const int nSup )bool OutputPattern( const int *const pat, const int nPatLen, const int nSup ){	bool Result = true;	int i=0, l=0;	// Added by Ramin#if defined( _FIND_CLOSED_SEQS )	Sequence * aSeq = new Sequence( (int *) pat, (int) nPatLen, (int) nSup );	Result = (*aSeqList).AddIfClosedSeq( aSeq );		if( !Result )		delete aSeq;#elif defined( _FIND_MAX_SEQS ) && defined( _DO_NAIVE_APPROACH )	//Sequence * aSeq = new Sequence( proj_db );	Sequence * aSeq = new Sequence( (int *)pat, (int) nPatLen, (int) nSup );	(*MainSeqTree).AddSeq( aSeq );#endif	/////////////////#if defined( _WRITE_FREQUENT_FILE )  for (; i<nPatLen; i++){ 	  fprintf(gpFreqFile, "(%d", pat[i]);    for (i++, l++; (i<nPatLen) && (pat[i]!=-1); i++){      l++;      fprintf (gpFreqFile, " %d", pat[i]);    }    fprintf(gpFreqFile, ") ");  }	fprintf(gpFreqFile, ": %f  %d\n", double(nSup)/double(gnCustCount), nSup);#else  for( ; i<nPatLen; i++ )		if( pat[i] != -1 )			l++;#endif	gnArrLargeCount[l]++;	return Result;}#endif // defined( _USE_OUTPUT_BUFFER )//////////////////////////////////////////////////////////////////////// END// Added by Raminvoid Test(){	int * dataset = (int*) GetStartOfMap (pDatasetMemMap);  int * lastAddr = (int*) GetLastAddrOfMap (pDatasetMemMap);	int * RecStart;	FILE *testFile = NULL;  testFile = file_open( "Test.txt", "w" ); 	long NumOfRecs = 0;	long NumOfItems = 0;	long NumOfItemSets = 0;  for( int nCount=0; dataset<lastAddr; dataset++, nCount++ )	{		RecStart = dataset;		NumOfRecs++;		// For each sequence    for (; *dataset!=-2; dataset++)		{			NumOfItemSets++;			// For each Itemset			for (; *dataset!=-1; dataset++)			{				NumOfItems++;				fprintf( testFile, " %d ", *dataset );			}			fprintf( testFile, " | " );		}		fprintf( testFile, "\n" );	}	fprintf( testFile, "Number of records = %d\n", NumOfRecs );	fprintf( testFile, "Number of ItemSets = %d\n", NumOfItemSets );	fprintf( testFile, "Number of Items = %d\n", NumOfItems );	fprintf( testFile, "Average number of ItemSets in a recored = %d\n", NumOfItemSets / NumOfRecs );	fprintf( testFile, "Average number of Items in an itemset = %d\n", NumOfItems / NumOfItemSets );  fclose( testFile );	exit( 0 );}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -