📄 genecluster.cpp
字号:
// GeneCluster.cpp: implementation of the CGeneCluster class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "RegCluster.h"
#include "GeneCluster.h"
#include <cmath>
#include <iostream.h>
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CGeneCluster::CGeneCluster()
{
SetUp();
}
CGeneCluster::~CGeneCluster()
{
Destruct();
}
void CGeneCluster::SetUp()
{
m_nGene = 0;
m_nCond = 0;
m_nMinC = 0;
m_nMinG = 0;
m_dEpsilon = 0;
m_nCandGeneSize = 0;
m_pGene = NULL;
m_pCluster = NULL;
m_pCandGene = NULL;
}
void CGeneCluster::Destruct()
{
if(m_pCluster != NULL)
{
delete m_pCluster;
m_pCluster = NULL;
}
if(m_pCandGene != NULL)
{
delete [] m_pCandGene;
m_pCandGene = NULL;
}
if(m_pGene != NULL)
{
delete [] m_pGene;
m_pGene = NULL;
}
}
void CGeneCluster::initialize(int nGene, int nCond, double **pValue,
double dGama, double dEpsilon, int nMinG, int nMinC)
{
m_nGene = nGene;
m_nCond = nCond;
m_dEpsilon = dEpsilon;
m_nMinG = nMinG;
m_nMinC = nMinC;
Destruct();
m_pGene = new CGene[m_nGene];
m_pCluster = new CCluster;
//m_pCandGene = new int[m_nGene];
m_pCluster->Initialize(nGene, nCond);
for(int i = 0; i < m_nGene; ++i)
{
m_pGene[i].SetCondSize(nCond);
m_pGene[i].SetGama(dGama);
m_pGene[i].InitializeGene(pValue[i]);
m_pGene[i].SortCondVal();
m_pGene[i].ChainCond();
m_pCluster->AppendGene(i, true);
m_pCluster->AppendGene(i, false);
// m_pCandGene[i] = -1;
}
}
BOOL CGeneCluster::PruneOne(CCluster *&pC)
{
if((pC->m_nPGene + pC->m_nNGene) < m_nMinG)
{
if(pC != NULL)
{
delete pC;
pC = NULL;
}
return true;
}
return false;
}
BOOL CGeneCluster::PruneTwo(CCluster *pC)
{
EstChainLen(pC);
ScanCandCond(pC);
return false;
}
// estimate the length of the chain
void CGeneCluster::EstChainLen(CCluster *pC)
{
if(pC->m_nChainLen == 0)
{
return ;
}
///////////////rewrite
int nLastCond = pC->m_pCondChain[pC->m_nChainLen-1];
//------------------------------------
for(int i = 0; i < m_nGene; ++i)
{
if(pC->m_pPGene[i] == 1) // the ith gene is in the current gene set
{
int j = m_pGene[i].GetCondIndex(nLastCond);
int nCount = 0;
for(; j < m_nCond; ++j)
{
if(m_pGene[i].m_pChain[j] > -1)
{
++nCount;
}
}
if((pC->m_nChainLen + nCount) < m_nMinC) // chain length samll than the threshold MinC
{
pC->m_pPGene[i] = -1;
}
}
}
}
// scan candidate condition
void CGeneCluster::ScanCandCond(CCluster *pC)
{
if(pC->m_nChainLen == 0)
{
for(int i = 0; i < m_nGene; ++i)
{
// from begining to end
int nCond = m_pGene[i].GetCondID(0);
pC->m_pCandCond[nCond] = 1;
for(int j = 1; j < m_nCond; ++j)
{
if(m_pGene[i].m_pCondVal[0] == m_pGene[i].m_pCondVal[j])
{
nCond = m_pGene[i].GetCondID(j);
pC->m_pCandCond[nCond] = 1;
}
else
{
break;
}
}
// from end to beginning
nCond = m_pGene[i].GetCondID(m_nCond-1);
pC->m_pCandCond[nCond] = 1;
for(j = m_nCond - 2; j >= 0; --j)
{
if(m_pGene[i].m_pCondVal[m_nCond-1] == m_pGene[i].m_pCondVal[j])
{
nCond = m_pGene[i].GetCondID(j);
pC->m_pCandCond[nCond] = 1;
}
else
{
break;
}
}
}
/*
for(i = 0; i < pC->m_nChainLen; ++i)
{
pC->m_pCandCond[pC->m_pCondChain[i]] = -1;
}
*/
}
else
{
int nLastCond = pC->m_pCondChain[pC->m_nChainLen-1]; // get the last condition ID
for(int i = 0; i < m_nGene; ++i)
{
int nIndex = m_pGene[i].GetCondIndex(nLastCond); // get the index in pID
for(int k = nIndex -1; k > 0; --k)
{
if(m_pGene[i].m_pChain[nIndex] == m_pGene[i].m_pChain[k])
{
--nIndex;
}
else
{
break;
}
}
for(int j = nIndex + 1; j < m_nCond; ++j)
{
if(m_pGene[i].m_pChain[j] == nIndex) // there is a chain point to the condition
{
int nCond = m_pGene[i].GetCondID(j);
pC->m_pCandCond[nCond] = 1;
// eliminate duplicate condition
if(nCond == nLastCond)
{
pC->m_pCandCond[nCond] = -1;
}
/*
for(int k = nIndex; k < m_nCond; ++k)
{
if(m_pGene[i].m_pCondVal[nIndex] == m_pGene[i].m_pCondVal[k])
{
int nCond = m_pGene[i].GetCondID(k);//nIndex
pC->m_pCandCond[nCond] = 1; // set it in candidate list
}
else
{
break;
}
}
*/
}
}
}
}
for(int i = 0; i < pC->m_nChainLen; ++i)
{
pC->m_pCandCond[pC->m_pCondChain[i]] = -1;
}
// eliminate the same condition
}
BOOL CGeneCluster::PruneThr(CCluster *&pC)
{
if(pC->m_nPGene < (m_nMinG / 2))
{
if(pC != NULL)
{
delete pC;
pC = NULL;
}
return true;
}
//--------------test---------------------------
// OutputClusterSet(pC);
//--------------test---------------------------
if( (pC->m_nChainLen >= m_nMinC) &&
((pC->m_nPGene + pC->m_nNGene) >= m_nMinG) &&
((pC->m_nPGene > pC->m_nNGene) ||
((pC->m_nPGene == pC->m_nNGene) &&
(pC->m_pCondChain[0] < pC->m_pCondChain[1]))))
{
BOOL bExsit = CheckExist(pC); // true represent already exist
if(bExsit)
{
if(pC != NULL)
{
delete pC;
pC = NULL;
}
return true;
}
else
{
OutputClusterSet(pC);
return false;
}
}
/**/
return false;
}
// check whether the cluster is already in the result cluster set
BOOL CGeneCluster::CheckExist(CCluster *pC)
{
BOOL bExist = true;
CCluster *p = m_pCluster;
while(p != NULL)
{
// check the order********************************
for(int i = 0; i < m_nGene; ++i)
{
if(p->m_pPGene[i] != pC->m_pPGene[i])
{
bExist = false;
break;
}
if(p->m_pNGene[i] != pC->m_pNGene[i])
{
bExist = false;
break;
}
}
for(i = 0; i < m_nCond; ++i)
{
if(p->m_pCondChain[i] != pC->m_pCondChain[i])
{
bExist = false;
}
}
p = p->m_pNext;
}
return bExist;
}
void CGeneCluster::OutputClusterSet(CCluster *pC)
{
CCluster *p = m_pCluster;
if(p == pC)
{
return;
}
while(p->m_pNext != NULL)
{
p = p->m_pNext;
}
p->m_pNext = pC;
}
BOOL CGeneCluster::PruneFur(CCluster *pC)
{
for(int i = 0; i < m_nCond; ++i)
{
if(pC->m_pCandCond[i] == 1)
{
int *pCandGene = new int[m_nGene];
int nCandGeneSize = 0;
ExplrSubsetGene(pC, i, pCandGene, nCandGeneSize);
SlideOverGene(pC, i, pCandGene, nCandGeneSize);
if(pCandGene != NULL)
{
delete [] pCandGene;
pCandGene = NULL;
}
}
}
return false;
}
void CGeneCluster::ExplrSubsetGene(CCluster *pC, int nCond, int *&pCandGene, int &nCandGeneSize)
{
for(int i = 0; i < m_nGene; ++i)
{
//m_pCandGene[i] = -1;
pCandGene[i] = -1;
}
int nCount = 0; // record the size of candidate gene
//int nLastCond = pC->m_pCandCond[pC->m_nChainLen-1];
int nLastCond = 0;
if(pC->m_nChainLen == 0)
{
for(int i = 0; i < m_nGene; ++i)
{
//m_pCandGene[i] = -1;
pCandGene[i] = i;
m_pGene[i].m_dCohert = 0;
}
nCandGeneSize = m_nGene;
}
else
{
nLastCond = pC->m_pCondChain[pC->m_nChainLen-1];
// find the candidate gene
for(i = 0; i < m_nGene; ++i)
{
int nPreIndex = m_pGene[i].GetCondIndex(nLastCond);
int nOrigionIndex = nPreIndex;
for(int k = nPreIndex -1; k > 0; --k)
{
if(m_pGene[i].m_pChain[nPreIndex] == m_pGene[i].m_pChain[k])
{
--nPreIndex;
}
else
{
break;
}
}
int nSucIndex = m_pGene[i].GetCondIndex(nCond);
if((m_pGene[i].m_pChain[nSucIndex] == nPreIndex))// ||
/* (m_pGene[i].m_pChain[nPreIndex] == nSucIndex))*/
{
//m_pCandGene[nCount] = i;
pCandGene[nCount] = i;
++nCount;
//m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCandCond[0], pC->m_pCandCond[1], pC->m_pCandCond[pC->m_nChainLen-1], nCond);
m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCondChain[0],
pC->m_pCondChain[1],
pC->m_pCondChain[pC->m_nChainLen-1],
nCond);
}
else
{
for(int k = nSucIndex -1; k > 0; --k)
{
if(m_pGene[i].m_pChain[nSucIndex] == m_pGene[i].m_pChain[k])
{
--nSucIndex;
}
else
{
break;
}
}
if(m_pGene[i].m_pChain[nOrigionIndex] == nSucIndex)
{
//m_pCandGene[nCount] = i;
pCandGene[nCount] = i;
++nCount;
//m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCandCond[0], pC->m_pCandCond[1], pC->m_pCandCond[pC->m_nChainLen-1], nCond);
m_pGene[i].m_dCohert = m_pGene[i].CalculateCohert(pC->m_pCondChain[0],
pC->m_pCondChain[1],
pC->m_pCondChain[pC->m_nChainLen-1],
nCond);
}
}
}
nCandGeneSize = nCount;
}
// insertsort the candidate gene by coherence in nondescending order
for(int j = 1; j < nCandGeneSize; ++j)
{
double val = m_pGene[pCandGene[j]].m_dCohert;
int id = pCandGene[j];
int i = j - 1;
while((i >= 0) && (m_pGene[pCandGene[i]].m_dCohert > val))
{
pCandGene[i+1] = pCandGene[i];
--i;
}
pCandGene[i+1] = id;
}
}
void CGeneCluster::SlideOverGene(CCluster *pC, int nCond, int *&pCandGene, int &nCandGeneSize)
{
for(int i = m_nMinG; i <= nCandGeneSize; ++i)
{
for(int j = 0; j <= nCandGeneSize - i; ++j)
{
BOOL bValid = true;
// -----------keep--------
/**/ for(int m = 0; m < i; ++m)
{
for(int n = m + 1; n < i; ++n)
{
bValid = ValidateCoher(&m_pGene[pCandGene[j+m]],
&m_pGene[pCandGene[j+n]],
pC->m_pCondChain);
if(bValid == false)
{
//return;
break;
}
}
if(bValid == false)
{
break;
}
}
if(bValid == false)
{
continue;
}
// append new cluster to the result chain
CCluster * p = new CCluster;
p->Initialize(m_nGene, m_nCond);
// distinguish positive and negative
if(pC->m_nChainLen == 0)
{
for(int k = 0; k < i; ++k)
{
int nCondIndex = m_pGene[pCandGene[j+k]].GetCondIndex(nCond);
if(nCondIndex < (m_nCond / 2))
{
p->AppendGene(pCandGene[j+k], true);
}
else
{
p->AppendGene(pCandGene[j+k], false);
}
}
}
else
{
if(pC->m_nChainLen == 3)
{
int stop = 1;
}
for(int k = 0; k < i; ++k)
{
//-------------rewrite------------------------------------------
if( m_pGene[pCandGene[j+k]].m_pCondVal[m_pGene[pCandGene[j+k]].GetCondIndex(pC->m_pCondChain[0])] <=
m_pGene[pCandGene[j+k]].m_pCondVal[m_pGene[pCandGene[j+k]].GetCondIndex(pC->m_pCondChain[pC->m_nChainLen-1])])
{
if(pC->m_pPGene[pCandGene[j+k]] == 1)
{
p->AppendGene(pCandGene[j+k], true);
}
else if(pC->m_pNGene[pCandGene[j+k]] == 1)
{
p->AppendGene(pCandGene[j+k], false);
}
}
else
{
p->AppendGene(pCandGene[j+k], false);
}
}
}
cout<<"condition:";
for(int k = 0; k < pC->m_nChainLen; ++k)
{
p->AppendCond(pC->m_pCondChain[k]);
cout<<pC->m_pCondChain[k]<<",";
}
p->AppendCond(nCond);
cout<<nCond<<endl;
// recursive invoke hereafter
MineCluster(p);
}
}
}
BOOL CGeneCluster::ValidateCoher(CGene *p1, CGene *p2, int *pCond)
{
if(pCond[0] == -1)
{
return true;
}
for(int i = 1; i < m_nCond; ++i)
{
if(pCond[i] == -1)
{
return true;
}
double dCoher1 = p1->CalculateCohert(pCond[0], pCond[1], pCond[i-1], pCond[i]);
double dCoher2 = p2->CalculateCohert(pCond[0], pCond[1], pCond[i-1], pCond[i]);
if(fabs(dCoher1 - dCoher2) > m_dEpsilon)
{
return false;
}
}
return true;
}
void CGeneCluster::MineCluster(CCluster *p)
{
if(PruneOne(p))
{
return;
}
if(PruneThr(p))
{
return;
}
PruneTwo(p);
PruneFur(p);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -