📄 ltkhierarchicalclustering.h
字号:
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
void getClusterResult(vector<vector<int> >& outClusterResult) const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::getClusterResult()"<<endl;
for(int v=0;v<m_intermediateCG.size();v++)
{
outClusterResult.push_back(m_intermediateCG[v]);
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::getClusterResult()"<<endl;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : computeProximityMatrix
* DESCRIPTION : Populates the argument (vector of vectors) with data objects indices.
* Each inner vector corresponds to a cluster.
* ARGUMENTS : distanceClassPtr - pointer to the class that has the distance definition
* distFuncPtr - function pointer to the distance function
* RETURNS : error code
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
//void computeProximityMatrix(ComputeDistanceFunc computeDistFuncObj)
int computeProximityMatrix(DistanceClass* distanceClassPtr,
FN_PTR_DISTANCE distFuncPtr)
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::computeProximityMatrix()"<<endl;
m_distancePtr=distFuncPtr;
m_distClassPtr=distanceClassPtr;
int errorCode;
if((errorCode=computeDistances())!=SUCCESS)
{
LOG(LTKLogger::LTK_LOGLEVEL_ERR)
<<"Error: LTKHierarchicalClustering::computeProximityMatrix()"<<endl;
LTKReturnError(errorCode);
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::computeProximityMatrix()"<<endl;
return SUCCESS;
}
private:
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : computeDistances
* DESCRIPTION : Computes inter-object distances and puts them in the distance matrix
* ARGUMENTS :
* RETURNS : error code
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
int computeDistances()
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::computeDistances()"<<endl;
for(int i=0;i<(m_data.size()-1);++i)
{
vector<float> eachRow((m_data.size()-i)-1);//added -1 at the end
int c=0;
for(int j=i+1;j<m_data.size();++j)
{
//external distance function called
int errorCode = (m_distClassPtr->*m_distancePtr)(m_data[i],m_data[j], eachRow[c]);
if (errorCode != SUCCESS )
{
LOG(LTKLogger::LTK_LOGLEVEL_ERR)
<<"Error while calling distance function"<<endl;
LOG(LTKLogger::LTK_LOGLEVEL_ERR)
<<"Error: LTKHierarchicalClustering::computeDistances()"<<endl;
LTKReturnError(errorCode);
}
++c;
}
m_proximityMatrix.push_back(eachRow);
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::computeDistances()"<<endl;
return SUCCESS;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : clusterToFindNumClusters
* DESCRIPTION : Clusters the data objects hierarchically (agglomerative)
* till the desired number of
* clusters and also evaluates the stopping criterion selected.
* ARGUMENTS :
* RETURNS : error code
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
int clusterToFindNumClusters()
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::clusterToFindNumClusters()"<<endl;
if(m_stoppingCriterion==LMETHOD)
{
//Number of clusters needs to be determined
if(m_determineClusters)
{
//map for number of clusters vs. merging distance
m_mergingDist.reserve(m_data.size());
}
}
else if(m_stoppingCriterion==AVG_SIL)
{
if(m_writeHTML==false && m_cachedResult.size()>0)
{
m_intermediateCG=m_cachedResult;
return SUCCESS;
}
}
for(int i=0;i<m_data.size();i++)
{
vector<int> v;
v.push_back(i);
//To begin with, each data object is cluster by itself
m_intermediateCG.push_back(v);
}
if(m_writeHTML) //If output is needed as html
{
string outputFilePath=m_outputDir+"/"+OUTPUT_HTML_FILE_NAME;
m_output.open(outputFilePath.c_str()); //Cluster output file is created
if(m_output.fail())
{
LOG(LTKLogger::LTK_LOGLEVEL_ERR)
<<"Unable to create file:"
<<outputFilePath<<endl;
LOG(LTKLogger::LTK_LOGLEVEL_ERR)
<<"Error : "<< EFILE_CREATION_FAILED
<<":"<< getErrorMessage(EFILE_CREATION_FAILED)
<<" LTKHierarchicalClustering::clusterToFindNumClusters()" <<endl;
LTKReturnError(EFILE_CREATION_FAILED);
}
/*Html tags are written*/
m_output<<"<html>\n";
m_output<<"<body>\n";
m_output<<"<table border='1' bordercolor='black'>\n";
m_output<<"<tr>\n";
for(int v=0;v<m_intermediateCG.size();v++)
{
int clusterSize=m_intermediateCG[v].size();
m_output<<"<td colspan=\""<<clusterSize<<"\">";
for(int w=0;w<clusterSize;w++)
{
if(m_hyperlinksVec.size()>0)
{
m_output<<"<a href='"
<<m_hyperlinksVec[m_intermediateCG[v][w]]
<<"'>"<<m_intermediateCG[v][w]<<"</a> ";
}
else
{
m_output<<m_intermediateCG[v][w]<<" ";
}
//if there is an image file corresponding to each data object
if(!m_imageFileExtn.empty())
{
m_output<<"<img src=\""
<<m_intermediateCG[v][w]<<"."<<m_imageFileExtn
<<"\" border=\"0\"/> ";
}
}
}
m_output<<"<td><b>";
m_output<<"Inter-cluster Dist";
m_output<<"</b></td>";
m_output<<"</tr>\n";
}
if(m_numOfClusters<m_data.size() || m_determineClusters==true)
{
int currNumOfClusters = m_data.size();
//this local variable is used only for Average Silhouette method
float minSil=FLT_MAX;
for(int it=0;it<(m_data.size()-m_numOfClusters);++it)
{
vector<int> toCluster;
//to find the clusters that need to be merged
float interClusterDistance=findGroup(toCluster);
currNumOfClusters=m_data.size()-it-1;
if(m_stoppingCriterion==AVG_SIL)
{
float silDiff=computeAvgSil(toCluster[0],toCluster[1]);
if(silDiff<minSil)
{
minSil=silDiff;
if(currNumOfClusters > 2)
{
m_numBySil=currNumOfClusters+1;
m_cachedResult=m_intermediateCG;
}
}
}
else if(m_stoppingCriterion==LMETHOD && m_determineClusters==true)
{
m_mergingDist[currNumOfClusters]=interClusterDistance;
}
//clusters are merged
m_intermediateCG[toCluster[0]].insert(m_intermediateCG[toCluster[0]].end(),
m_intermediateCG[toCluster[1]].begin(),
m_intermediateCG[toCluster[1]].end());
//old cluster deleted
m_intermediateCG.erase(m_intermediateCG.begin()+ toCluster[1]);
if(m_writeHTML)
{
if(!m_showAllLevels)
{
if(currNumOfClusters==m_numOfClusters)
{
writeClustersAsHTML(interClusterDistance);
}
}
else
{
writeClustersAsHTML(interClusterDistance);
}
}
}
}
//closing the html tags
if(m_writeHTML)
{
m_output<<"</table>\n";
m_output<<"</body>\n";
m_output<<"</html>";
m_output.close();
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::clusterToFindNumClusters()"<<endl;
return SUCCESS;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : getInterObjectDistance
* DESCRIPTION : Returns the distance between two data objects from the distance matrix
* ARGUMENTS : firstObjIndex - index of the first data object
* secondObjIndex - index of the second data object
* RETURNS : distance (float)
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
float getInterObjectDistance(int firstObjIndex,int secondObjIndex) const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::getInterObjectDistance()"<<endl;
int row = 0;
int col = 0;
if(firstObjIndex < secondObjIndex)
{
row=firstObjIndex;
col=secondObjIndex;
}
else
{
row=secondObjIndex;
col=firstObjIndex;
} //lesser index is made as row and the other as column
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::getInterObjectDistance()"<<endl;
return m_proximityMatrix[row][col-row-1];
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : findGroup
* DESCRIPTION : Finds the indices in the m_intermediateCG (clusters) that
* need to be merged
* ARGUMENTS : pairToCombine - vector for storing the cluster indices
* RETURNS : inter cluster distance (float)
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
//double findGroup(vector<int> pairToCombine)
float findGroup(vector<int>& pairToCombine) const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::findGroup()"<<endl;
float minDistance=FLT_MAX;
pairToCombine.clear();
pairToCombine.resize(2);
for(int i=0;i<m_intermediateCG.size();i++)
{
for(int j=i+1;j<m_intermediateCG.size();j++)
{
float tempDist=findInterClusterDistance(m_intermediateCG[i],
m_intermediateCG[j]);
if(tempDist<minDistance)
{
minDistance=tempDist;
pairToCombine[0]=i;
pairToCombine[1]=j;
}
}
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::findGroup()"<<endl;
return minDistance;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : findInterClusterDistance
* DESCRIPTION : Finds the inter-cluster distance.
* The contents of each cluster are in the vectors.
* ARGUMENTS : v1 cluster one
* v2 cluster two
* RETURNS : inter cluster distance
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
//double findInterClusterDistance(const vector<int>& v1,const vector<int>& v2)
float findInterClusterDistance(const vector<int>& cluster1, const vector<int>& cluster2) const
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::findInterClusterDistance()"<<endl;
float groupDistance=0.0;
/*For single-linkage algorithm*/
if(m_method==SINGLE_LINKAGE)
{
groupDistance= FLT_MAX;
for(int i=0;i<cluster1.size();i++)
{
for(int j=0;j<cluster2.size();j++)
{
float temp=getInterObjectDistance(cluster1[i],cluster2[j]);
if(temp < groupDistance)
{
groupDistance=temp;
}
}
}
}
/*For average-linkage algorithm*/
if(m_method==AVERAGE_LINKAGE)
{
groupDistance=0.0;
for(int i=0;i<cluster1.size();i++)
{
for(int j=0;j<cluster2.size();j++)
{
groupDistance+=getInterObjectDistance(cluster1[i],cluster2[j]);
}
}
groupDistance/=((float)(cluster1.size()*cluster2.size()));
}
/*For complete-linkage algorithm*/
if(m_method==COMPLETE_LINKAGE)
{
groupDistance=0.0;
for(int i=0;i<cluster1.size();i++)
{
for(int j=0;j<cluster2.size();j++)
{
float temp=getInterObjectDistance(cluster1[i],cluster2[j]);
if(temp > groupDistance)
{
groupDistance=temp;
}
}
}
}
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
<<"LTKHierarchicalClustering::findInterClusterDistance()"<<endl;
return groupDistance;
}
/**********************************************************************************
* AUTHOR : Bharath A
* DATE : 22-FEB-2005
* NAME : writeClustersAsHTML
* DESCRIPTION : Writes the cluster results as html with data objects' and clusters' ids.
* If hyperlinks vector is set, provides links to actual files.
* ARGUMENTS : interClustDist - merging distance of the new cluster formed
* RETURNS :
* NOTES :
* CHANGE HISTROY
* Author Date Description of change
*************************************************************************************/
void writeClustersAsHTML(float interClustDist)
{
LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
<<"LTKHierarchicalClustering::writeClustersAsHTML()"<<endl;
m_output<<"<tr>\n";
for(int v=0;v<m_intermediateCG.size();v++)
{
int clusterSize=m_intermediateCG[v].size();
m_output<<"<td colspan=\""<<clusterSize<<"\">";
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -