📄 projdb.cpp
字号:
proj_db[nCount].m_nPatLen = (pDB->m_nPatLen+1); proj_db[nCount].m_pnPat = (int*)memalloc(j); if (pDB->m_nPatLen == 1) { proj_db[nCount].m_pnPat[0] = (int) pDB->m_pnPat; } else { memcpy (proj_db[nCount].m_pnPat, pDB->m_pnPat, j-sizeof(int)); } proj_db[nCount].m_pnPat[pDB->m_nPatLen] = i;#if defined( _USE_OUTPUT_BUFFER ) proj_db[nCount].OutputBuf = OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+1, intra[i].count ); (*proj_db[nCount].OutputBuf).Parent = (*pDB).OutputBuf; (*(*pDB).OutputBuf).ReferenceCount++;#else OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+1, intra[i].count );#endif proj_db[nCount].m_nMaxSup = intra[i].count; n_total_mem+=(nSize=intra[i].count*sizeof(struct PROJ_SEQ)); proj_db[nCount].m_pProjSeq = (struct PROJ_SEQ*)memalloc(nSize); memset (proj_db[nCount].m_pProjSeq, 0, nSize); proj_db[nCount].m_nVer = -1; proj_db[nCount].m_nSup = 0; #if defined( _CALC_I_NUM_OF_ITEMS ) proj_db[nCount].NumOfItems = 0; proj_db[nCount].ItemIsIntra=true; proj_db[nCount].Item=i; #endif intra_freq_idx[i] = nCount; nCount++; } if (inter[i].count>=gSUP) { proj_db[nCount].m_nPatLen = (pDB->m_nPatLen+2); proj_db[nCount].m_pnPat = (int*)memalloc(sizeof(int)+j); if (pDB->m_nPatLen == 1) { proj_db[nCount].m_pnPat[0] = (int) pDB->m_pnPat; } else { memcpy (proj_db[nCount].m_pnPat, pDB->m_pnPat, j-sizeof(int)); } proj_db[nCount].m_pnPat[pDB->m_nPatLen] = -1; proj_db[nCount].m_pnPat[pDB->m_nPatLen+1] = i;#if defined( _USE_OUTPUT_BUFFER ) proj_db[nCount].OutputBuf = OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+2, inter[i].count ); (*proj_db[nCount].OutputBuf).Parent = (*pDB).OutputBuf; (*(*pDB).OutputBuf).ReferenceCount++;#else OutputPattern(proj_db[nCount].m_pnPat, pDB->m_nPatLen+2, inter[i].count );#endif proj_db[nCount].m_nMaxSup = inter[i].count; n_total_mem+=(nSize=inter[i].count*sizeof(struct PROJ_SEQ)); proj_db[nCount].m_pProjSeq = (struct PROJ_SEQ*)memalloc(nSize); memset (proj_db[nCount].m_pProjSeq, 0, nSize); proj_db[nCount].m_nVer = -1; proj_db[nCount].m_nSup = 0; #if defined( _CALC_I_NUM_OF_ITEMS ) proj_db[nCount].NumOfItems = 0; proj_db[nCount].ItemIsIntra=false; proj_db[nCount].Item=i; #endif inter_freq_idx[i] = nCount; nCount++; } } #if defined( _USE_STRING_ELEMINATION ) bool CheckStrings = false; if( (*MainSeqTree).IsContained( new Sequence(pDB) ) ) { CheckStrings = true; } #endif // defined( _USE_STRING_ELEMINATION ) // scan database again, do projection for (nCount=0; nCount<pDB->m_nSup;) { nCount++; nProjCnt=pDB->m_pProjSeq[nCount-1].m_nProjCount; for (i=0; i<nProjCnt; i++) {#ifdef DISK_BASED if (nProjCnt==1) dataset = (int*) pDB->m_pProjSeq[nCount-1].m_ppSeq; else dataset = pDB->m_pProjSeq[nCount-1].m_ppSeq[i];#else dataset = pDB->m_pProjSeq[nCount-1].m_ppSeq[i];#endif #if defined( _USE_STRING_ELEMINATION ) if( CheckStrings ) { if( (*MainSeqTree).IsContained( pDB, dataset ) ) { //Sequence * aS = new Sequence( pDB, dataset ); //(*aS).Print(); //printf( " Sequence Eleminated.\n" ); continue; } } #endif // defined( _USE_STRING_ELEMINATION ) // counting intra-element items. for (; *dataset>=0; dataset++) { // eat up consecutive identical numbers. while (dataset[0]==dataset[1]) dataset++; j=intra_freq_idx[*dataset]; if (dataset[2]==-2 || j<0 || proj_db[j].m_nVer>=nCount) continue; tempDB = proj_db + j; tempSeq = tempDB->m_pProjSeq + tempDB->m_nSup; tempDB->m_nVer = nCount; #if defined( _CALC_I_NUM_OF_ITEMS ) (*tempDB).NumOfItems += (lastAddr - dataset ); /*if ((*tempDB).NumOfItems == 1) { printf("INTRAERROR\n"); printf("%d\n", lastAddr-dataset); exit(1); }*/ #endif for (buf_idx[0]=int(dataset+1), l=1, j=i; j<nProjCnt; j++) { if (j==i) d=dataset+1; else d=pDB->m_pProjSeq[nCount-1].m_ppSeq[j]; while (*d!=-1 && d[2]!=-2 && *d!=*dataset) d++; if (d[2]!=-2 && *d==*dataset && d[1]!=-1) buf_idx[l++]=int(d+1); }#ifdef DISK_BASED tempDB->m_nSup++; tempSeq->m_nProjCount = l; if (l==1) { tempSeq->m_ppSeq = (int**) buf_idx[0]; } else { n_total_mem+=(nSize=sizeof(int*)*l); tempSeq->m_ppSeq = (int**)memalloc(nSize); memcpy (tempSeq->m_ppSeq, buf_idx, nSize); }#else buf_idx[0]=nSeqLen=0; bIntra=true; d=dataset+1; for (k=l, j=l=1; *d!=-2; d++) { for (bIndx=false; *d!=-1; d++) { if (j<k && (int*)(buf_idx[j])==d) { bIntra=bIndx=true; j++; buf_idx[l++]=nSeqLen; } if (bIntra) { if (intra_freq_idx[*d]<0 && inter_freq_idx[*d]<0) continue; } else if (inter_freq_idx[*d]<0) continue; bufseq[nSeqLen++]=*d; bIndx=false; } bIntra=false; if (bIndx) l--; if (nSeqLen==0 || bufseq[nSeqLen-1]!=-1) bufseq[nSeqLen++]=-1; } if (nSeqLen<2) continue; tempDB->m_nSup++; bufseq[nSeqLen++]=-2; tempSeq->m_nProjCount = l; n_total_mem+=(nSize=sizeof(int*)*l); tempSeq->m_ppSeq=(int**)memalloc(nSize); n_total_mem+=(nSize=sizeof(int*)*nSeqLen); tempDB->m_nSeqSize=nSize; tempSeq->m_ppSeq[0]=(int*)memalloc(nSize); memcpy(tempSeq->m_ppSeq[0], bufseq, nSize); for (j=1; j<l; j++) tempSeq->m_ppSeq[j]=tempSeq->m_ppSeq[0]+buf_idx[j];#endif } // end for counting intra-element items. // for inter we only need to work with the longest instance // of a projected sequence. ie. the first one (i==0). if (i!=0) continue; for (dataset++; *dataset!=-2; dataset++) { // counting inter-element items. for (; *dataset!=-1; dataset++) { // eat up consecutive identical numbers. while (dataset[0]==dataset[1]) dataset++; j=inter_freq_idx[*dataset]; if (dataset[2]==-2 || j<0 || proj_db[j].m_nVer>=nCount) continue; tempDB = proj_db + j; tempSeq = tempDB->m_pProjSeq + tempDB->m_nSup; tempDB->m_nVer = nCount; #if defined( _CALC_I_NUM_OF_ITEMS ) (*tempDB).NumOfItems += (lastAddr - dataset); /*if ((*tempDB).NumOfItems == 1) { printf("INTERERROR\n"); printf("%d\n", lastAddr-dataset); exit(1); }*/ #endif for (d=dataset+1, buf_idx[0]=int(d), l=1; d[2]!=-2; d++) { if (*d==*dataset && d[1]!=-1) buf_idx[l++] = int(d+1); }#ifdef DISK_BASED tempDB->m_nSup++; tempSeq->m_nProjCount = l; if (l==1) { tempSeq->m_ppSeq = (int**) buf_idx[0]; } else { n_total_mem+=(nSize=sizeof(int*)*l); tempSeq->m_ppSeq = (int**)memalloc(nSize); memcpy (tempSeq->m_ppSeq, buf_idx, nSize); }#else buf_idx[0]=nSeqLen=0; d=dataset+1; for (k=l, j=l=1; *d!=-2; d++) { for (bIndx=false; *d!=-1; d++) { if (j<k && (int*)(buf_idx[j])==d) { bIndx=true; j++; buf_idx[l++]=nSeqLen; } if (inter_freq_idx[*d]<0) continue; bufseq[nSeqLen++]=*d; bIndx=false; } if (bIndx) l--; if (nSeqLen==0 || bufseq[nSeqLen-1]!=-1) bufseq[nSeqLen++]=-1; } if (nSeqLen<2) continue; tempDB->m_nSup++; bufseq[nSeqLen++]=-2; tempSeq->m_nProjCount = l; n_total_mem+=(nSize=sizeof(int*)*l); tempSeq->m_ppSeq=(int**)memalloc(nSize); n_total_mem+=(nSize=sizeof(int*)*nSeqLen); tempDB->m_nSeqSize=nSize; tempSeq->m_ppSeq[0]=(int*)memalloc(nSize); memcpy(tempSeq->m_ppSeq[0], bufseq, nSize); for (j=1; j<l; j++) tempSeq->m_ppSeq[j]=tempSeq->m_ppSeq[0]+buf_idx[j];#endif } } // end for counting inter-element items. } // end of all projection instances of a sequence. } // end of all sequences. } // enf if number of projections is greater than 0.#if defined( _PERFORM_COMMON_PREFIX_CHECKING )DONE:#endif // defined( _PERFORM_COMMON_PREFIX_CHECKING ) if (n_max_mem<n_total_mem) n_max_mem = n_total_mem; if (pDB->m_nPatLen > 1) { n_total_mem-=(pDB->m_nPatLen*sizeof(int)); freemem ((void**) &(pDB->m_pnPat)); } for (i=0; i<pDB->m_nSup; i++) {#ifndef DISK_BASED n_total_mem-=pDB->m_nSeqSize; freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq[0]));#endif if (pDB->m_pProjSeq[i].m_nProjCount > 1) { n_total_mem-=(pDB->m_pProjSeq[i].m_nProjCount*sizeof(int*)); freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq)); } } n_total_mem-=(pDB->m_nMaxSup*sizeof(struct PROJ_SEQ)); freemem ((void**) &(pDB->m_pProjSeq)); // PrintProjDBs(proj_db, *pnFreqCount); return proj_db;}//this part is copied from make_projdb_from_projected_db's tail part.#if defined (_ANOTHER_CLOSED_APPROACH)void clean_projcted_db(const struct PROJ_DB* pDB, int* pnFreqCount){ int i; if (n_max_mem<n_total_mem) n_max_mem = n_total_mem; if (pDB->m_nPatLen > 1) { n_total_mem-=(pDB->m_nPatLen*sizeof(int)); freemem ((void**) &(pDB->m_pnPat)); } for (i=0; i<pDB->m_nSup; i++) {#ifndef DISK_BASED n_total_mem-=pDB->m_nSeqSize; freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq[0]));#endif if (pDB->m_pProjSeq[i].m_nProjCount > 1) { n_total_mem-=(pDB->m_pProjSeq[i].m_nProjCount*sizeof(int*)); freemem ((void**) &(pDB->m_pProjSeq[i].m_ppSeq)); } } n_total_mem-=(pDB->m_nMaxSup*sizeof(struct PROJ_SEQ)); freemem ((void**) &(pDB->m_pProjSeq));}#endif//////////////////////////////////////////////////////////////////////// Local functions.//////////////////////////////////////////////////////////////////////void PrintProjDBs(const struct PROJ_DB *proj_db, const int nCount){ int i=0, j=0, k=0, l=0, nProjCnt=0, *dataset=0; printf("\nProjected databases:\n"); for (i=0; i<nCount; i++){ printf("Proj. DB. for ("); if (proj_db[i].m_nPatLen == 1) printf("%d", int(proj_db[i].m_pnPat)); else for (j=0; j<proj_db[i].m_nPatLen; j++) { if (proj_db[i].m_pnPat[j]==-1) printf(")"); else if (j>0 && proj_db[i].m_pnPat[j-1]==-1) printf(" (%d", proj_db[i].m_pnPat[j]); else if (j>0) printf(" %d", proj_db[i].m_pnPat[j]); else printf("%d", proj_db[i].m_pnPat[j]); } printf("\n "); #if defined( _CALC_I_NUM_OF_ITEMS ) printf( "NumOfItems = %d m_nSupport= %d maxSupport= %d totalmem= %d maxmem= %d\n", proj_db[i].NumOfItems, proj_db[i].m_nSup, proj_db[i].m_nMaxSup, n_total_mem, n_max_mem ); /*if (proj_db[i].NumOfItems == 1) { printf("WRONGPRINT\n"); exit(1); }*/ #endif return; for (j=0; j<proj_db[i].m_nSup; j++){ nProjCnt = proj_db[i].m_pProjSeq[j].m_nProjCount; for (k=0; k<nProjCnt; k++){#ifdef DISK_BASED if (nProjCnt==1) dataset = (int*) proj_db[i].m_pProjSeq[j].m_ppSeq; else dataset = proj_db[i].m_pProjSeq[j].m_ppSeq[k];#else dataset = proj_db[i].m_pProjSeq[j].m_ppSeq[k];#endif for(l=0; dataset[l]!=-2; l++){ if (dataset[l]==-1) printf(")"); else if (l>0 && dataset[l-1] == -1) printf(" (%d", dataset[l]); else if (l>0) printf(" %d", dataset[l]); else printf("%d", dataset[l]); } // end of an instance for a projected sequence. printf("\n "); } // end of all instances for a projected sequence. printf("\n "); } // end of all projected sequences. }}#if defined( _USE_OUTPUT_BUFFER )//inline Sequence * OutputPattern( const int *const pat, const int nPatLen, const int nSup )Sequence * OutputPattern( const int *const pat, const int nPatLen, const int nSup ){ int i=0, l=0; Sequence * aSeq = new Sequence( (int *) pat, (int) nPatLen, (int) nSup ); /////////////////#if defined( _WRITE_FREQUENT_FILE ) for (; i<nPatLen; i++){ fprintf(gpFreqFile, "(%d", pat[i]); for (i++, l++; (i<nPatLen) && (pat[i]!=-1); i++){ l++; fprintf (gpFreqFile, " %d", pat[i]); } fprintf(gpFreqFile, ") "); } fprintf(gpFreqFile, ": %f\n", double(nSup)/double(gnCustCount));#else for( ; i<nPatLen; i++ ) if( pat[i] != -1 ) l++;#endif gnArrLargeCount[l]++; return aSeq;}#else // defined( _USE_OUTPUT_BUFFER )//inline bool OutputPattern( const int *const pat, const int nPatLen, const int nSup )bool OutputPattern( const int *const pat, const int nPatLen, const int nSup ){ bool Result = true; int i=0, l=0; // Added by Ramin#if defined( _FIND_CLOSED_SEQS ) Sequence * aSeq = new Sequence( (int *) pat, (int) nPatLen, (int) nSup ); Result = (*aSeqList).AddIfClosedSeq( aSeq ); if( !Result ) delete aSeq;#elif defined( _FIND_MAX_SEQS ) && defined( _DO_NAIVE_APPROACH ) //Sequence * aSeq = new Sequence( proj_db ); Sequence * aSeq = new Sequence( (int *)pat, (int) nPatLen, (int) nSup ); (*MainSeqTree).AddSeq( aSeq );#endif /////////////////#if defined( _WRITE_FREQUENT_FILE ) for (; i<nPatLen; i++){ fprintf(gpFreqFile, "(%d", pat[i]); for (i++, l++; (i<nPatLen) && (pat[i]!=-1); i++){ l++; fprintf (gpFreqFile, " %d", pat[i]); } fprintf(gpFreqFile, ") "); } fprintf(gpFreqFile, ": %f %d\n", double(nSup)/double(gnCustCount), nSup);#else for( ; i<nPatLen; i++ ) if( pat[i] != -1 ) l++;#endif gnArrLargeCount[l]++; return Result;}#endif // defined( _USE_OUTPUT_BUFFER )//////////////////////////////////////////////////////////////////////// END// Added by Raminvoid Test(){ int * dataset = (int*) GetStartOfMap (pDatasetMemMap); int * lastAddr = (int*) GetLastAddrOfMap (pDatasetMemMap); int * RecStart; FILE *testFile = NULL; testFile = file_open( "Test.txt", "w" ); long NumOfRecs = 0; long NumOfItems = 0; long NumOfItemSets = 0; for( int nCount=0; dataset<lastAddr; dataset++, nCount++ ) { RecStart = dataset; NumOfRecs++; // For each sequence for (; *dataset!=-2; dataset++) { NumOfItemSets++; // For each Itemset for (; *dataset!=-1; dataset++) { NumOfItems++; fprintf( testFile, " %d ", *dataset ); } fprintf( testFile, " | " ); } fprintf( testFile, "\n" ); } fprintf( testFile, "Number of records = %d\n", NumOfRecs ); fprintf( testFile, "Number of ItemSets = %d\n", NumOfItemSets ); fprintf( testFile, "Number of Items = %d\n", NumOfItems ); fprintf( testFile, "Average number of ItemSets in a recored = %d\n", NumOfItemSets / NumOfRecs ); fprintf( testFile, "Average number of Items in an itemset = %d\n", NumOfItems / NumOfItemSets ); fclose( testFile ); exit( 0 );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -