⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ltkhierarchicalclustering.h

📁 An open source handwriting recongnition package!!!
💻 H
📖 第 1 页 / 共 3 页
字号:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/
void getClusterResult(vector<vector<int> >& outClusterResult) const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::getClusterResult()"<<endl;

	for(int v=0;v<m_intermediateCG.size();v++)
	{

		outClusterResult.push_back(m_intermediateCG[v]);
	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::getClusterResult()"<<endl;
}

/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: computeProximityMatrix
* DESCRIPTION	: Populates the argument (vector of vectors) with data objects indices.
*				  Each inner vector corresponds to a cluster.
* ARGUMENTS		: distanceClassPtr - pointer to the class that has the distance definition
*				  distFuncPtr - function pointer to the distance function
* RETURNS		: error code
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/

//void computeProximityMatrix(ComputeDistanceFunc computeDistFuncObj)
int computeProximityMatrix(DistanceClass* distanceClassPtr,
						   FN_PTR_DISTANCE distFuncPtr)
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::computeProximityMatrix()"<<endl;

	m_distancePtr=distFuncPtr;
	m_distClassPtr=distanceClassPtr;

	int errorCode;

	if((errorCode=computeDistances())!=SUCCESS)
	{
		 LOG(LTKLogger::LTK_LOGLEVEL_ERR)
			<<"Error: LTKHierarchicalClustering::computeProximityMatrix()"<<endl;

		LTKReturnError(errorCode);
	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::computeProximityMatrix()"<<endl;

	return SUCCESS;
}



private:

/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: computeDistances
* DESCRIPTION	: Computes inter-object distances and puts them in the distance matrix
* ARGUMENTS		:
* RETURNS		: error code
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/

int computeDistances()
{

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::computeDistances()"<<endl;

	for(int i=0;i<(m_data.size()-1);++i)
	{
		vector<float> eachRow((m_data.size()-i)-1);//added -1 at the end

		int c=0;

		for(int j=i+1;j<m_data.size();++j)
		{
			//external distance function called
			int errorCode = (m_distClassPtr->*m_distancePtr)(m_data[i],m_data[j], eachRow[c]);

			if (errorCode != SUCCESS )
			{
				LOG(LTKLogger::LTK_LOGLEVEL_ERR)
					<<"Error while calling distance function"<<endl;

				LOG(LTKLogger::LTK_LOGLEVEL_ERR)
					<<"Error: LTKHierarchicalClustering::computeDistances()"<<endl;

				LTKReturnError(errorCode);
			}

			++c;

		}

		m_proximityMatrix.push_back(eachRow);

	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::computeDistances()"<<endl;

    return SUCCESS;

}



/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: clusterToFindNumClusters
* DESCRIPTION	: Clusters the data objects hierarchically (agglomerative)
*				  till the desired number of
*				  clusters and also evaluates the stopping criterion selected.
* ARGUMENTS		:
* RETURNS		: error code
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/

int clusterToFindNumClusters()
{

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::clusterToFindNumClusters()"<<endl;

   if(m_stoppingCriterion==LMETHOD)
   {
	   	//Number of clusters needs to be determined
		if(m_determineClusters)
		{
			//map for number of clusters vs. merging distance
			m_mergingDist.reserve(m_data.size());
		}
   }
   else if(m_stoppingCriterion==AVG_SIL)
   {
		if(m_writeHTML==false && m_cachedResult.size()>0)
		{
			m_intermediateCG=m_cachedResult;

			return SUCCESS;
		}
   }

	for(int i=0;i<m_data.size();i++)
	{
		vector<int> v;
		v.push_back(i);

		//To begin with, each data object is cluster by itself
		m_intermediateCG.push_back(v);
	}

	if(m_writeHTML)  //If output is needed as html
	{
		string outputFilePath=m_outputDir+"/"+OUTPUT_HTML_FILE_NAME;

		m_output.open(outputFilePath.c_str()); //Cluster output file is created

		if(m_output.fail())
		{
			LOG(LTKLogger::LTK_LOGLEVEL_ERR)
				<<"Unable to create file:"
				<<outputFilePath<<endl;

			LOG(LTKLogger::LTK_LOGLEVEL_ERR)
				<<"Error : "<< EFILE_CREATION_FAILED
				<<":"<< getErrorMessage(EFILE_CREATION_FAILED)
                <<" LTKHierarchicalClustering::clusterToFindNumClusters()" <<endl;

			LTKReturnError(EFILE_CREATION_FAILED);
		}

		/*Html tags are written*/
		m_output<<"<html>\n";
		m_output<<"<body>\n";
		m_output<<"<table border='1' bordercolor='black'>\n";


		m_output<<"<tr>\n";


		for(int v=0;v<m_intermediateCG.size();v++)
		{

			int clusterSize=m_intermediateCG[v].size();

			m_output<<"<td colspan=\""<<clusterSize<<"\">";

			for(int w=0;w<clusterSize;w++)
			{
				if(m_hyperlinksVec.size()>0)
				{
					m_output<<"<a href='"
							<<m_hyperlinksVec[m_intermediateCG[v][w]]
							<<"'>"<<m_intermediateCG[v][w]<<"</a>&nbsp;";
				}
				else
				{
					m_output<<m_intermediateCG[v][w]<<"&nbsp;";
				}

				//if there is an image file corresponding to each data object
				if(!m_imageFileExtn.empty())
				{
					m_output<<"<img src=\""
							<<m_intermediateCG[v][w]<<"."<<m_imageFileExtn
							<<"\" border=\"0\"/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;";
				}


			}
		}

		m_output<<"<td><b>";
		m_output<<"Inter-cluster Dist";
		m_output<<"</b></td>";

		m_output<<"</tr>\n";

	}

	if(m_numOfClusters<m_data.size() || m_determineClusters==true)
	{

		int currNumOfClusters = m_data.size();

		//this local variable is used only for Average Silhouette method
		float minSil=FLT_MAX;


		for(int it=0;it<(m_data.size()-m_numOfClusters);++it)
		{
			vector<int> toCluster;

			//to find the clusters that need to be merged
			float interClusterDistance=findGroup(toCluster);

			currNumOfClusters=m_data.size()-it-1;

			if(m_stoppingCriterion==AVG_SIL)
			{
				float silDiff=computeAvgSil(toCluster[0],toCluster[1]);

				if(silDiff<minSil)
				{
					minSil=silDiff;
					if(currNumOfClusters > 2)
					{
						m_numBySil=currNumOfClusters+1;
						m_cachedResult=m_intermediateCG;
					}

				}
			}
			else if(m_stoppingCriterion==LMETHOD && m_determineClusters==true)
			{
					m_mergingDist[currNumOfClusters]=interClusterDistance;
			}


			//clusters are merged
			m_intermediateCG[toCluster[0]].insert(m_intermediateCG[toCluster[0]].end(),
												  m_intermediateCG[toCluster[1]].begin(),
												  m_intermediateCG[toCluster[1]].end());

			//old cluster deleted
			m_intermediateCG.erase(m_intermediateCG.begin()+ toCluster[1]);



				if(m_writeHTML)
				{
					if(!m_showAllLevels)
					{
						if(currNumOfClusters==m_numOfClusters)
						{
							writeClustersAsHTML(interClusterDistance);
						}
					}
					else
					{
						writeClustersAsHTML(interClusterDistance);
					}

				}


		}


	}


	//closing the html tags
	if(m_writeHTML)
	{
		m_output<<"</table>\n";
		m_output<<"</body>\n";
		m_output<<"</html>";

		m_output.close();
	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::clusterToFindNumClusters()"<<endl;

	return SUCCESS;

}







/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: getInterObjectDistance
* DESCRIPTION	: Returns the distance between two data objects from the distance matrix
* ARGUMENTS		: firstObjIndex - index of the first data object
*				  secondObjIndex -  index of the second data object
* RETURNS		: distance (float)
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/
float getInterObjectDistance(int firstObjIndex,int secondObjIndex) const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::getInterObjectDistance()"<<endl;

	int row = 0;
	int col = 0;

	if(firstObjIndex < secondObjIndex)
	{
		row=firstObjIndex;
		col=secondObjIndex;
	}
	else
	{
		row=secondObjIndex;
		col=firstObjIndex;
	} //lesser index is made as row and the other as column

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::getInterObjectDistance()"<<endl;

	return m_proximityMatrix[row][col-row-1];
}

/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: findGroup
* DESCRIPTION	: Finds the indices in the m_intermediateCG (clusters) that
*				  need to be merged
* ARGUMENTS		: pairToCombine - vector for storing the cluster indices
* RETURNS		: inter cluster distance (float)
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/
//double findGroup(vector<int> pairToCombine)
float findGroup(vector<int>& pairToCombine) const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::findGroup()"<<endl;

	float minDistance=FLT_MAX;

	pairToCombine.clear();

	pairToCombine.resize(2);

	for(int i=0;i<m_intermediateCG.size();i++)
	{
		for(int j=i+1;j<m_intermediateCG.size();j++)
		{
			float tempDist=findInterClusterDistance(m_intermediateCG[i],
													 m_intermediateCG[j]);

			if(tempDist<minDistance)
			{

				minDistance=tempDist;
				pairToCombine[0]=i;
				pairToCombine[1]=j;
			}
		}

	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::findGroup()"<<endl;


	return minDistance;

}


/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: findInterClusterDistance
* DESCRIPTION	: Finds the inter-cluster distance.
*				  The contents of each cluster are in the vectors.
* ARGUMENTS		: v1 cluster one
*				  v2 cluster two
* RETURNS		: inter cluster distance
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/

//double findInterClusterDistance(const vector<int>& v1,const vector<int>& v2)
float findInterClusterDistance(const vector<int>& cluster1, const vector<int>& cluster2) const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::findInterClusterDistance()"<<endl;

	float groupDistance=0.0;

	/*For single-linkage algorithm*/
	if(m_method==SINGLE_LINKAGE)
	{
		groupDistance= FLT_MAX;

		for(int i=0;i<cluster1.size();i++)
		{
			for(int j=0;j<cluster2.size();j++)
			{
				float temp=getInterObjectDistance(cluster1[i],cluster2[j]);

				if(temp < groupDistance)
				{
					groupDistance=temp;
				}
			}
		}
	}


	/*For average-linkage algorithm*/
	if(m_method==AVERAGE_LINKAGE)
	{
		groupDistance=0.0;

		for(int i=0;i<cluster1.size();i++)
		{
			for(int j=0;j<cluster2.size();j++)
			{

				groupDistance+=getInterObjectDistance(cluster1[i],cluster2[j]);
			}
		}

		groupDistance/=((float)(cluster1.size()*cluster2.size()));
	}


	/*For complete-linkage algorithm*/
	if(m_method==COMPLETE_LINKAGE)
	{
		groupDistance=0.0;

		for(int i=0;i<cluster1.size();i++)
		{
			for(int j=0;j<cluster2.size();j++)
			{
				float temp=getInterObjectDistance(cluster1[i],cluster2[j]);

				if(temp > groupDistance)
				{
					groupDistance=temp;
				}
			}
		}
	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::findInterClusterDistance()"<<endl;


	return groupDistance;


}

/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: writeClustersAsHTML
* DESCRIPTION	: Writes the cluster results as html with data objects' and clusters' ids.
*				  If hyperlinks vector is set, provides links to actual files.
* ARGUMENTS		: interClustDist - merging distance of the new cluster formed
* RETURNS		:
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/

void writeClustersAsHTML(float interClustDist)
{

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::writeClustersAsHTML()"<<endl;

	m_output<<"<tr>\n";

	for(int v=0;v<m_intermediateCG.size();v++)
	{
		int clusterSize=m_intermediateCG[v].size();

		m_output<<"<td colspan=\""<<clusterSize<<"\">";

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -