📄 ltkhierarchicalclustering.h
字号:
m_output<<"("<<v<<")<br>";
for(int w=0;w<clusterSize;w++)
{
if(m_hyperlinksVec.size()>0)
{
m_output<<"<a href='"<<m_hyperlinksVec[m_intermediateCG[v][w]]
<<"'>"<<m_intermediateCG[v][w]<<"</a> ";
}
else
{
m_output<<m_intermediateCG[v][w]<<" ";
}
if(!m_imageFileExtn.empty())
{
m_output<<"<img src=\""<<m_intermediateCG[v][w]<<"."
<<m_imageFileExtn
<<"\" border=\"0\"/> ";
}
}
}
m_output<<"<td>";
m_output<<"("<<m_intermediateCG.size()
<<") <b>"<<interClustDist<<"</b>";
m_output<<"</td>";
m_output<<"</tr>\n";
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::writeClustersAsHTML()"<<endl;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : determineNumOfClusters
* DESCRIPTION : Determines the number of clusters to be formed using
* iterative refinement of L-method.
* REFERENCE:S. Salvador and P. Chan. Determining the number of clusters/
* segments in hierarchical clustering/segmentation algorithms.
* Proceedings of 16th IEEE International Conference
* on Tools with Artificial Intelligence, 3:1852-1857, 2004.
* ARGUMENTS :
* RETURNS : number of clusters
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
int determineNumOfClusters() const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::determineNumOfClusters()"<<endl;
int cutOff=(int)m_mergingDist.capacity()-1;
int lastKnee=0;
int currentKnee=0;
lastKnee=cutOff;
currentKnee=lastKnee;
bool trueCutOff=false;
/*Iterative refinement of the L-method result*/
while(true)
{
lastKnee=currentKnee;
currentKnee=determineKnee(cutOff);
if(trueCutOff)
{
if(currentKnee >= lastKnee)
{
break;
}
}
int temp=currentKnee*2;
if(temp>cutOff)
{
--cutOff;
trueCutOff=false;
}
else
{
cutOff=temp;
trueCutOff=true;
}
if(cutOff < MIN_CUTOFF)
{
break;
}
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)
<<"Number of clusters determined by iterative refinement:"
<<currentKnee<<endl;
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::determineNumOfClusters()"<<endl;
return currentKnee;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : determineKnee
* DESCRIPTION : Determines the knee of the given number of clusters vs. merging distance curve
* ARGUMENTS : maxNumClusters - maximum number of clusters
* RETURNS : knee point of the curve
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
// TODO: handle errors
int determineKnee(int maxNumClusters) const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::determineKnee()"<<endl;
int result=0;
float minRMSE = FLT_MAX;
//atleast two points are required to fit lines, hence c ranges
//from 3 to maxNumClusters-2
for(int c=3;c<maxNumClusters-2;c++)
{
float lRMSE=0,rRMSE=0;
findRMSE(c,maxNumClusters,lRMSE,rRMSE);
float cRMSE=(((float)(c-1)/(float)(maxNumClusters-1))*lRMSE)
+(((float)(maxNumClusters-c)/(float)(maxNumClusters-1))*rRMSE);
if(cRMSE<minRMSE)
{
minRMSE=cRMSE;
result=c;
}
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::determineKnee()"<<endl;
return result+1;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : determineKnee
* DESCRIPTION : Determines left and right RMSE values of the given point
* on the no. of clusters vs. merging distance curve.
* It fits two regression lines on either side of the point to find RMSEs
* ARGUMENTS : candidateKnee - candidata knee point
* maxNumClusters - upper bound on number of clusters
* lRMSE - output RMSE on the left of c
* rRMSE - output RMSE on the right of c
* RETURNS :
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
// TODO: prototype change
void findRMSE(int candidateKnee,int maxNumClusters,float& outLRMSE,float& outRRMSE) const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::findRMSE()"<<endl;
float avgXLeft = 0;
float avgYLeft = 0;
float avgXRight = 0;
float avgYRight = 0;
/*Regression line coefficients*/
float beta0Left = 0;
float beta1Left = 0;
float beta0Right = 0;
float beta1Right = 0;
int i = 0;
int j = 0;
for(i=2;i<=candidateKnee;i++)
{
avgYLeft+=m_mergingDist[i];
avgXLeft+=i;
}
avgYLeft /= (candidateKnee-1);
avgXLeft /= (candidateKnee-1);
for(j=candidateKnee+1;j<=maxNumClusters;j++)
{
avgYRight += m_mergingDist[j];
avgXRight += j;
}
avgYRight /= (maxNumClusters-candidateKnee);
avgXRight /= (maxNumClusters-candidateKnee);
float numer=0;
float denom=0;
for(i=2;i<=candidateKnee;i++)
{
numer += ((i-avgXLeft)*(m_mergingDist[i]-avgYLeft));
denom += ((i-avgXLeft)*(i-avgXLeft));
}
beta1Left = numer/denom;
beta0Left = avgYLeft-(beta1Left*avgXLeft);
numer=0;denom=0;
for(j=candidateKnee+1;j<=maxNumClusters;j++)
{
numer += ((j-avgXRight)*(m_mergingDist[j]-avgYRight));
denom += ((j-avgXRight)*(j-avgXRight));
}
if(denom > EPS)
{
beta1Right = numer / denom;
}
else
{
beta1Right = 0;
}
beta0Right = avgYRight-(beta1Right*avgXRight);
float errorSOS=0;
for(i=2;i<=candidateKnee;i++)
{
float yCap = (beta0Left+(beta1Left*i));
errorSOS += ((m_mergingDist[i]-yCap)*(m_mergingDist[i]-yCap));
}
outLRMSE=sqrt(errorSOS/(candidateKnee-2));
errorSOS=0;
for(j=candidateKnee+1;j<=maxNumClusters;j++)
{
float yCap = beta0Right + (beta1Right*j);
errorSOS += (m_mergingDist[j]-yCap)*(m_mergingDist[j]-yCap);
}
outRRMSE=sqrt(errorSOS/(maxNumClusters-candidateKnee-1));
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::findRMSE()"<<endl;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : computeAvgSil
* DESCRIPTION : Funtion that determines the change in the ratio of
* intra and inter cluster similarity before and after merging
* ARGUMENTS : clust1Index - index of cluster one
* clust2Index - index of cluster two
* RETURNS : average silhouette computed
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
// TODO: prototype change, error handling
float computeAvgSil(int clust1Index,int clust2Index) const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::computeAvgSil()"<<endl;
const vector<int>& clust1=m_intermediateCG[clust1Index];
const vector<int>& clust2=m_intermediateCG[clust2Index];
vector<int> combinedClust;
combinedClust.insert(combinedClust.end(),clust1.begin(),clust1.end());
combinedClust.insert(combinedClust.end(),clust2.begin(),clust2.end());
float clust1TotalSils=0.0f,clust2TotalSils=0.0f,combinedClustTotalSils=0.0f;
for(int i=0;i<clust1.size();i++)
{
int dataObj=clust1[i];
float avgIntraDist=0.0;
if(clust1.size()>1)
{
for(int j=0;j<clust1.size();j++)
{
if(clust1[j]!=dataObj)
{
avgIntraDist+=getInterObjectDistance(dataObj,clust1[j]);
}
}
avgIntraDist/=((float)(clust1.size()-1));
}
float minInterDist= FLT_MAX;
for(int r=0;r<m_intermediateCG.size();r++)
{
float avgInterDist=0.0;
if(r!=clust1Index)
{
for(int c=0;c<m_intermediateCG[r].size();c++)
{
avgInterDist+=getInterObjectDistance(dataObj,m_intermediateCG[r][c]);
}
avgInterDist/=((float)m_intermediateCG[r].size());
if(avgInterDist<minInterDist)
{
minInterDist=avgInterDist;
}
}
}
float dataObjSil=0.0;
if(minInterDist > avgIntraDist && minInterDist > EPS)
{
dataObjSil=(minInterDist-avgIntraDist)/minInterDist;
}
else if(avgIntraDist > EPS)
{
dataObjSil=(minInterDist-avgIntraDist)/avgIntraDist;
}
clust1TotalSils+=dataObjSil;
}
for(int ii=0;ii<clust2.size();ii++)
{
int dataObj=clust2[ii];
float avgIntraDist=0.0;
if(clust2.size()>1)
{
for(int j=0;j<clust2.size();j++)
{
if(clust2[j]!=dataObj)
{
avgIntraDist+=getInterObjectDistance(dataObj,clust2[j]);
}
}
avgIntraDist/=((float)(clust2.size()-1));
}
float minInterDist= FLT_MAX;
for(int r=0;r<m_intermediateCG.size();r++)
{
float avgInterDist=0.0;
if(r!=clust2Index)
{
for(int c=0;c<m_intermediateCG[r].size();c++)
{
avgInterDist+=getInterObjectDistance(dataObj,
m_intermediateCG[r][c]);
}
avgInterDist/=((float)m_intermediateCG[r].size());
if(avgInterDist < minInterDist)
{
minInterDist=avgInterDist;
}
}
}
float dataObjSil=0.0;
if(minInterDist > avgIntraDist && minInterDist > EPS)
{
dataObjSil=(minInterDist-avgIntraDist)/minInterDist;
}
else if(avgIntraDist > EPS)
{
dataObjSil=(minInterDist-avgIntraDist)/avgIntraDist;
}
clust2TotalSils+=dataObjSil;
}
for(int iii=0;iii<combinedClust.size();iii++)
{
int dataObj=combinedClust[iii];
float avgIntraDist=0.0;
if(combinedClust.size()>1)
{
for(int j=0;j<combinedClust.size();j++)
{
if(combinedClust[j]!=dataObj)
{
avgIntraDist+=getInterObjectDistance(dataObj,combinedClust[j]);
}
}
avgIntraDist/=((float)(combinedClust.size()-1));
}
float minInterDist=FLT_MAX;
for(int r=0;r<m_intermediateCG.size();r++)
{
if(r!=clust1Index && r!=clust2Index)
{
float avgInterDist=0.0f;
for(int c=0;c<m_intermediateCG[r].size();c++)
{
avgInterDist+=getInterObjectDistance(dataObj,m_intermediateCG[r][c]);
}
avgInterDist = (float)avgInterDist/ ((float)m_intermediateCG[r].size());
if(avgInterDist<minInterDist)
{
minInterDist=avgInterDist;
}
}
}
float dataObjSil=0.0;
if(minInterDist>avgIntraDist && minInterDist > EPS)
{
dataObjSil=(minInterDist-avgIntraDist)/minInterDist;
}
else if(avgIntraDist > EPS)
{
dataObjSil=(minInterDist-avgIntraDist)/avgIntraDist;
}
combinedClustTotalSils+=dataObjSil;
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::computeAvgSil()"<<endl;
return (combinedClustTotalSils-clust1TotalSils-clust2TotalSils);
}
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -