📄 genecluster.cpp

📁 用分类算法挖掘数据表达模式中的平移变化和放缩变化
💻 CPP
字号:
// GeneCluster.cpp: implementation of the CGeneCluster class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "RegCluster.h"
#include "GeneCluster.h"

#include <cmath>
#include <iostream.h>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CGeneCluster::CGeneCluster()
{
	SetUp();
}

CGeneCluster::~CGeneCluster()
{
	Destruct();
}

void CGeneCluster::SetUp()
{
	m_nGene = 0;
	m_nCond = 0;
	m_nMinC = 0;
	m_nMinG = 0;
	m_dEpsilon = 0;
	m_nCandGeneSize = 0;

	m_pGene = NULL;
	m_pCluster = NULL;
	m_pCandGene = NULL;
}

void CGeneCluster::Destruct()
{
	if(m_pCluster != NULL)
	{
		delete m_pCluster;
		m_pCluster = NULL;
	}
	if(m_pCandGene != NULL)
	{
		delete [] m_pCandGene;
		m_pCandGene = NULL;
	}
	if(m_pGene != NULL)
	{
		delete [] m_pGene;
		m_pGene = NULL;
	}
	
}

void CGeneCluster::initialize(int nGene, int nCond, double **pValue, 
							  double dGama, double dEpsilon, int nMinG, int nMinC)
{
	m_nGene = nGene;
	m_nCond = nCond;
	m_dEpsilon = dEpsilon;
	m_nMinG = nMinG;
	m_nMinC = nMinC;

	Destruct();
	m_pGene = new CGene[m_nGene];
	m_pCluster = new CCluster;
	//m_pCandGene = new int[m_nGene];
	
	m_pCluster->Initialize(nGene, nCond);
	for(int i = 0; i < m_nGene; ++i)
	{
		m_pGene[i].SetCondSize(nCond);
		m_pGene[i].SetGama(dGama);
		m_pGene[i].InitializeGene(pValue[i]);
		m_pGene[i].SortCondVal();
		m_pGene[i].ChainCond();

		m_pCluster->AppendGene(i, true);
		m_pCluster->AppendGene(i, false);

//		m_pCandGene[i] = -1;
	}

}

BOOL CGeneCluster::PruneOne(CCluster *&pC)
{
	if((pC->m_nPGene + pC->m_nNGene) < m_nMinG)
	{
		if(pC != NULL)
		{
			delete pC;
			pC = NULL;
		}
		return true;
	}
	return false;
}

BOOL CGeneCluster::PruneTwo(CCluster *pC)
{
	EstChainLen(pC);
	ScanCandCond(pC);
	return false;
}

// estimate the length of the chain
void CGeneCluster::EstChainLen(CCluster *pC)
{
	if(pC->m_nChainLen == 0)
	{
		return ;
	}
	///////////////rewrite
	int nLastCond = pC->m_pCondChain[pC->m_nChainLen-1];
	//------------------------------------
	for(int i = 0; i < m_nGene; ++i)
	{
		if(pC->m_pPGene[i] == 1)	// the ith gene is in the current gene set
		{
			int j = m_pGene[i].GetCondIndex(nLastCond);
			int nCount = 0;
			for(; j < m_nCond; ++j)
			{
				if(m_pGene[i].m_pChain[j] > -1)
				{
					++nCount;
				}
			}

			if((pC->m_nChainLen + nCount) < m_nMinC)	// chain length samll than the threshold MinC
			{
				pC->m_pPGene[i] = -1;
			}
		}
	}
}

// scan candidate condition
void CGeneCluster::ScanCandCond(CCluster *pC)
{
	if(pC->m_nChainLen == 0)
	{
		for(int i = 0; i < m_nGene; ++i)
		{
			// from begining to end
			int nCond = m_pGene[i].GetCondID(0);
			pC->m_pCandCond[nCond] = 1;
			for(int j = 1; j < m_nCond; ++j)
			{
				if(m_pGene[i].m_pCondVal[0] == m_pGene[i].m_pCondVal[j])
				{
					nCond = m_pGene[i].GetCondID(j);
					pC->m_pCandCond[nCond] = 1;					
				}
				else
				{
					break;
				}
			}
			// from end to beginning
			nCond = m_pGene[i].GetCondID(m_nCond-1);
			pC->m_pCandCond[nCond] = 1;
			for(j = m_nCond - 2; j >= 0; --j)
			{
				if(m_pGene[i].m_pCondVal[m_nCond-1] == m_pGene[i].m_pCondVal[j])
				{
					nCond = m_pGene[i].GetCondID(j);
					pC->m_pCandCond[nCond] = 1;					
				}
				else
				{
					break;
				}
			}
		}
		/*
		for(i = 0; i < pC->m_nChainLen; ++i)
		{
			pC->m_pCandCond[pC->m_pCondChain[i]] = -1;
		}
		*/
	}
	else
	{
		int nLastCond = pC->m_pCondChain[pC->m_nChainLen-1];	// get the last condition ID
		for(int i = 0; i < m_nGene; ++i)
		{
			int nIndex = m_pGene[i].GetCondIndex(nLastCond);	// get the index in pID
			for(int k = nIndex -1; k > 0; --k)
			{
				if(m_pGene[i].m_pChain[nIndex] == m_pGene[i].m_pChain[k])
				{
					--nIndex;
				}
				else
				{
					break;
				}
			}
			for(int j = nIndex + 1; j < m_nCond; ++j)
			{

				if(m_pGene[i].m_pChain[j] == nIndex)			// there is a chain point to the condition
				{
					int nCond = m_pGene[i].GetCondID(j);
					pC->m_pCandCond[nCond] = 1;

					// eliminate duplicate condition
					if(nCond == nLastCond)
					{
						pC->m_pCandCond[nCond] = -1;
					}

					/*
					for(int k = nIndex; k < m_nCond; ++k)
					{
						if(m_pGene[i].m_pCondVal[nIndex] == m_pGene[i].m_pCondVal[k])
						{
							int nCond = m_pGene[i].GetCondID(k);//nIndex	
							pC->m_pCandCond[nCond] = 1;						// set it in candidate list
						}
						else
						{
							break;
						}
					}
					*/
				}
			}
		}
	}
	for(int i = 0; i < pC->m_nChainLen; ++i)
	{
		pC->m_pCandCond[pC->m_pCondChain[i]] = -1;
	}
	// eliminate the same condition
}

BOOL CGeneCluster::PruneThr(CCluster *&pC)
{
	if(pC->m_nPGene < (m_nMinG / 2))
	{
		if(pC != NULL)
		{
			delete pC;
			pC = NULL;
		}
		return true;
	}
	//--------------test---------------------------
//	OutputClusterSet(pC);
	//--------------test---------------------------
	
	if( (pC->m_nChainLen >= m_nMinC) &&
		((pC->m_nPGene + pC->m_nNGene) >= m_nMinG) &&
		((pC->m_nPGene > pC->m_nNGene) ||
		((pC->m_nPGene == pC->m_nNGene) &&
		(pC->m_pCondChain[0] < pC->m_pCondChain[1]))))
	{
		BOOL bExsit = CheckExist(pC);	// true represent already exist
		if(bExsit)
		{
			if(pC != NULL)
			{
				delete pC;
				pC = NULL;
			}

			return true;
		}
		else
		{
			OutputClusterSet(pC);
			return false;
		}
	}
	/**/
	return false;
}

// check whether the cluster is already in the result cluster set
BOOL CGeneCluster::CheckExist(CCluster *pC)
{
	BOOL bExist = true;
	CCluster *p = m_pCluster;
	while(p != NULL)
	{
		// check the order********************************
		for(int i = 0; i < m_nGene; ++i)
		{
			if(p->m_pPGene[i] != pC->m_pPGene[i])
			{
				bExist = false;
				break;
			}
			if(p->m_pNGene[i] != pC->m_pNGene[i])
			{
				bExist = false;
				break;
			}

		}
		for(i = 0; i < m_nCond; ++i)
		{
			if(p->m_pCondChain[i] != pC->m_pCondChain[i])
			{
				bExist = false;
			}
		}

		p = p->m_pNext;
	}
	return bExist;
}

void CGeneCluster::OutputClusterSet(CCluster *pC)
{
	CCluster *p = m_pCluster;
	if(p == pC)
	{
		return;
	}
	while(p->m_pNext != NULL)
	{
		p = p->m_pNext;
	}
	p->m_pNext = pC;
}

BOOL CGeneCluster::PruneFur(CCluster *pC)
{
	for(int i = 0; i < m_nCond; ++i)
	{
		if(pC->m_pCandCond[i] == 1)
		{
			int *pCandGene = new int[m_nGene];
			int nCandGeneSize = 0;

			ExplrSubsetGene(pC, i, pCandGene, nCandGeneSize);
			SlideOverGene(pC, i, pCandGene, nCandGeneSize);

			if(pCandGene != NULL)
			{
				delete [] pCandGene;
				pCandGene = NULL;
			}
		}

	}
	return false;
}

void CGeneCluster::ExplrSubsetGene(CCluster *pC, int nCond, int *&pCandGene, int &nCandGeneSize)
{
	for(int i = 0; i < m_nGene; ++i)
	{
		//m_pCandGene[i] = -1;
		pCandGene[i] = -1;
	}
	int nCount = 0;	// record the size of candidate gene

	//int nLastCond = pC->m_pCandCond[pC->m_nChainLen-1];
	int nLastCond = 0;
	if(pC->m_nChainLen == 0)
	{
		for(int i = 0; i < m_nGene; ++i)
		{
			//m_pCandGene[i] = -1;
			pCandGene[i] = i;
			m_pGene[i].m_dCohert = 0;
		}
		nCandGeneSize = m_nGene;
	}
	else
	{
		nLastCond = pC->m_pCondChain[pC->m_nChainLen-1];
		// find the candidate gene
		for(i = 0; i < m_nGene; ++i)
		{
			int nPreIndex = m_pGene[i].GetCondIndex(nLastCond);
			int nOrigionIndex = nPreIndex;

			for(int k = nPreIndex -1; k > 0; --k)
			{
				if(m_pGene[i].m_pChain[nPreIndex] == m_pGene[i].m_pChain[k])
				{
					--nPreIndex;
				}
				else
				{
					break;
				}
			}
			int nSucIndex = m_pGene[i].GetCondIndex(nCond);
			if((m_pGene[i].m_pChain[nSucIndex] == nPreIndex))// ||	
			  /* (m_pGene[i].m_pChain[nPreIndex] == nSucIndex))*/
			{
				//m_pCandGene[nCount] = i;
				pCandGene[nCount] = i;
				++nCount;
				//m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCandCond[0], pC->m_pCandCond[1], pC->m_pCandCond[pC->m_nChainLen-1], nCond);
				m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCondChain[0], 
																  pC->m_pCondChain[1], 
																  pC->m_pCondChain[pC->m_nChainLen-1], 
																  nCond);
			}
			else 
			{
				for(int k = nSucIndex -1; k > 0; --k)
				{
					if(m_pGene[i].m_pChain[nSucIndex] == m_pGene[i].m_pChain[k])
					{
						--nSucIndex;
					}
					else
					{
						break;
					}
				}
				if(m_pGene[i].m_pChain[nOrigionIndex] == nSucIndex)
				{
					//m_pCandGene[nCount] = i;
					pCandGene[nCount] = i;
					++nCount;
					//m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCandCond[0], pC->m_pCandCond[1], pC->m_pCandCond[pC->m_nChainLen-1], nCond);
					m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCondChain[0], 
																	  pC->m_pCondChain[1], 
																	  pC->m_pCondChain[pC->m_nChainLen-1], 
																	  nCond);
				}
			}
		}
		nCandGeneSize = nCount;
	}
	

	// insertsort the candidate gene by coherence in nondescending order
	for(int j = 1; j < nCandGeneSize; ++j)
	{
		double val = m_pGene[pCandGene[j]].m_dCohert;
		int id = pCandGene[j];
		int i = j - 1;

		while((i >= 0) && (m_pGene[pCandGene[i]].m_dCohert > val))
		{
			pCandGene[i+1] = pCandGene[i];
			--i;
		}
		pCandGene[i+1] = id;
	}

}

void CGeneCluster::SlideOverGene(CCluster *pC, int nCond, int *&pCandGene, int &nCandGeneSize)
{
	for(int i = m_nMinG; i <= nCandGeneSize; ++i)
	{
		for(int j = 0; j <= nCandGeneSize - i; ++j)
		{
			BOOL bValid = true;
		 // -----------keep--------
		/**/	for(int m = 0; m < i; ++m)
			{
				for(int n = m + 1; n < i; ++n)
				{
					bValid = ValidateCoher(&m_pGene[pCandGene[j+m]],
										   &m_pGene[pCandGene[j+n]],
									       pC->m_pCondChain);	
					if(bValid == false)
					{
						//return;
						break;
					}
				}
				if(bValid == false)
				{
					break;
				}
			}
		
			if(bValid == false)
			{
				continue;
			}
		
			// append new cluster to the result chain
			CCluster * p = new CCluster;
			p->Initialize(m_nGene, m_nCond);

			// distinguish positive and negative
			if(pC->m_nChainLen == 0)
			{
				for(int k = 0; k < i; ++k)
				{
					int nCondIndex = m_pGene[pCandGene[j+k]].GetCondIndex(nCond);
					if(nCondIndex < (m_nCond / 2))
					{
						p->AppendGene(pCandGene[j+k], true);
					}
					else
					{
						p->AppendGene(pCandGene[j+k], false);
					}
				}

			}
			else
			{
				if(pC->m_nChainLen == 3)
				{
					int stop = 1;
				}

				for(int k = 0; k < i; ++k)
				{
					//-------------rewrite------------------------------------------
					if( m_pGene[pCandGene[j+k]].m_pCondVal[m_pGene[pCandGene[j+k]].GetCondIndex(pC->m_pCondChain[0])] <= 
						m_pGene[pCandGene[j+k]].m_pCondVal[m_pGene[pCandGene[j+k]].GetCondIndex(pC->m_pCondChain[pC->m_nChainLen-1])])
					{
						if(pC->m_pPGene[pCandGene[j+k]] == 1)
						{
							p->AppendGene(pCandGene[j+k], true);
						}
						else if(pC->m_pNGene[pCandGene[j+k]] == 1)
						{
							p->AppendGene(pCandGene[j+k], false);
						}
					}
					else
					{
						p->AppendGene(pCandGene[j+k], false);
					}
				}
			}
			cout<<"condition:";
			for(int k = 0; k < pC->m_nChainLen; ++k)
			{
				p->AppendCond(pC->m_pCondChain[k]);
				cout<<pC->m_pCondChain[k]<<",";
			}
			p->AppendCond(nCond);
			cout<<nCond<<endl;
			// recursive invoke hereafter
			MineCluster(p);
		}
	}
}

BOOL CGeneCluster::ValidateCoher(CGene *p1, CGene *p2, int *pCond)
{
	if(pCond[0] == -1)
	{
		return true;
	}
	for(int i = 1; i < m_nCond; ++i)
	{
		if(pCond[i] == -1)
		{
			return true;
		}
		double dCoher1 = p1->CalculateCohert(pCond[0], pCond[1], pCond[i-1], pCond[i]);
		double dCoher2 = p2->CalculateCohert(pCond[0], pCond[1], pCond[i-1], pCond[i]);
		if(fabs(dCoher1 - dCoher2) > m_dEpsilon)
		{
			return false;
		}
	}
	return true;
}

void CGeneCluster::MineCluster(CCluster *p)
{
	if(PruneOne(p))
	{
		return;
	}
	if(PruneThr(p))
	{
		return;
	}
                                             
	PruneTwo(p);
	PruneFur(p);
}
💿 文件大小 83 K
👤 上传用户 gjq2000
📂 所属分类数值算法/人工智能
🏷️ 相关标签

#变化 #分类算法 #数据表 #模式
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -