⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ltkhierarchicalclustering.h

📁 An open source handwriting recongnition package!!!
💻 H
📖 第 1 页 / 共 3 页
字号:

		m_output<<"("<<v<<")<br>";

		for(int w=0;w<clusterSize;w++)
		{
			if(m_hyperlinksVec.size()>0)
			{
				m_output<<"<a href='"<<m_hyperlinksVec[m_intermediateCG[v][w]]
				        <<"'>"<<m_intermediateCG[v][w]<<"</a>&nbsp;";
			}
			else
			{
				m_output<<m_intermediateCG[v][w]<<"&nbsp;";
			}
			if(!m_imageFileExtn.empty())
			{
				m_output<<"<img src=\""<<m_intermediateCG[v][w]<<"."
				        <<m_imageFileExtn
				        <<"\" border=\"0\"/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;";
			}

		}
	}

	m_output<<"<td>";

	m_output<<"("<<m_intermediateCG.size()
			<<")&nbsp;&nbsp;&nbsp;<b>"<<interClustDist<<"</b>";

	m_output<<"</td>";

	m_output<<"</tr>\n";

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::writeClustersAsHTML()"<<endl;

}

/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: determineNumOfClusters
* DESCRIPTION	: Determines the number of clusters to be formed using
*				  iterative refinement of L-method.
*				  REFERENCE:S. Salvador and P. Chan. Determining the number of clusters/
*				  segments in hierarchical clustering/segmentation algorithms.
*				  Proceedings of 16th IEEE International Conference
*				  on Tools with Artificial Intelligence, 3:1852-1857, 2004.
* ARGUMENTS		:
* RETURNS		: number of clusters
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/

int determineNumOfClusters() const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::determineNumOfClusters()"<<endl;

	int cutOff=(int)m_mergingDist.capacity()-1;

	int lastKnee=0;
	int currentKnee=0;

	lastKnee=cutOff;

	currentKnee=lastKnee;

	bool trueCutOff=false;

	/*Iterative refinement of the L-method result*/

	while(true)
	{

		lastKnee=currentKnee;

		currentKnee=determineKnee(cutOff);

		if(trueCutOff)
		{

			if(currentKnee >= lastKnee)
			{

				break;
			}
		}

		int temp=currentKnee*2;

		if(temp>cutOff)
		{
			--cutOff;

			trueCutOff=false;
		}
		else
		{
			cutOff=temp;

			trueCutOff=true;
		}


		if(cutOff < MIN_CUTOFF)
		{

			break;
		}


	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)
		<<"Number of clusters determined by iterative refinement:"
		<<currentKnee<<endl;

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::determineNumOfClusters()"<<endl;

	return currentKnee;
}




/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: determineKnee
* DESCRIPTION	: Determines the knee of the given number of clusters vs. merging distance curve
* ARGUMENTS		: maxNumClusters - maximum number of clusters
* RETURNS		: knee point of the curve
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/
// TODO: handle errors
int determineKnee(int maxNumClusters) const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::determineKnee()"<<endl;

	int result=0;

	float minRMSE = FLT_MAX;

	//atleast two points are required to fit lines, hence c ranges
	//from 3 to maxNumClusters-2
	for(int c=3;c<maxNumClusters-2;c++)
	{

		float lRMSE=0,rRMSE=0;

		findRMSE(c,maxNumClusters,lRMSE,rRMSE);

		float	cRMSE=(((float)(c-1)/(float)(maxNumClusters-1))*lRMSE)
					  +(((float)(maxNumClusters-c)/(float)(maxNumClusters-1))*rRMSE);

		if(cRMSE<minRMSE)
		{

			minRMSE=cRMSE;

			result=c;
		}
	}

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::determineKnee()"<<endl;

	return result+1;
}




/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: determineKnee
* DESCRIPTION	: Determines left and right RMSE values of the given point
*				  on the no. of clusters vs. merging distance curve.
*				  It fits two regression lines on either side of the point to find RMSEs
* ARGUMENTS		: candidateKnee - candidata knee point
*				  maxNumClusters - upper bound on number of clusters
*				  lRMSE - output RMSE on the left of c
*				  rRMSE - output RMSE on the right of c
* RETURNS		:
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/
// TODO: prototype change
void findRMSE(int candidateKnee,int maxNumClusters,float& outLRMSE,float& outRRMSE) const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::findRMSE()"<<endl;

	float avgXLeft = 0;
	float avgYLeft = 0;
	float avgXRight = 0;
	float avgYRight = 0;

	/*Regression line coefficients*/
	float beta0Left = 0;
	float beta1Left = 0;
	float beta0Right = 0;
	float beta1Right = 0;

	int i = 0;
	int j = 0;

	for(i=2;i<=candidateKnee;i++)
	{
		avgYLeft+=m_mergingDist[i];

		avgXLeft+=i;
	}


	avgYLeft /= (candidateKnee-1);
	avgXLeft /= (candidateKnee-1);


	for(j=candidateKnee+1;j<=maxNumClusters;j++)
	{

		avgYRight += m_mergingDist[j];
		avgXRight += j;
	}

	avgYRight /= (maxNumClusters-candidateKnee);
	avgXRight /= (maxNumClusters-candidateKnee);

	float numer=0;
	float denom=0;

	for(i=2;i<=candidateKnee;i++)
	{

		numer += ((i-avgXLeft)*(m_mergingDist[i]-avgYLeft));
		denom += ((i-avgXLeft)*(i-avgXLeft));
	}

	beta1Left = numer/denom;
	beta0Left = avgYLeft-(beta1Left*avgXLeft);

	numer=0;denom=0;

	for(j=candidateKnee+1;j<=maxNumClusters;j++)
	{
		numer += ((j-avgXRight)*(m_mergingDist[j]-avgYRight));
		denom += ((j-avgXRight)*(j-avgXRight));
	}

    if(denom > EPS)
    {
		beta1Right = numer / denom;
	}
	else
	{
		beta1Right = 0;
	}

	beta0Right = avgYRight-(beta1Right*avgXRight);

	float errorSOS=0;

	for(i=2;i<=candidateKnee;i++)
	{

		float yCap = (beta0Left+(beta1Left*i));

		errorSOS += ((m_mergingDist[i]-yCap)*(m_mergingDist[i]-yCap));

	}

	outLRMSE=sqrt(errorSOS/(candidateKnee-2));

	errorSOS=0;

	for(j=candidateKnee+1;j<=maxNumClusters;j++)
	{

		float yCap = beta0Right + (beta1Right*j);

		errorSOS += (m_mergingDist[j]-yCap)*(m_mergingDist[j]-yCap);

	}

	outRRMSE=sqrt(errorSOS/(maxNumClusters-candidateKnee-1));

	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::findRMSE()"<<endl;

}




/**********************************************************************************
* AUTHOR		: Bharath A
* DATE			: 22-FEB-2005
* NAME			: computeAvgSil
* DESCRIPTION	: Funtion that determines the change in the ratio of
*				  intra and inter cluster similarity before and after merging
* ARGUMENTS		: clust1Index - index of cluster one
*				  clust2Index - index of cluster two
* RETURNS		: average silhouette computed
* NOTES			:
* CHANGE HISTROY
* Author			Date				Description of change
*************************************************************************************/
// TODO: prototype change, error handling
float computeAvgSil(int clust1Index,int clust2Index) const
{
	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Entering: "
		<<"LTKHierarchicalClustering::computeAvgSil()"<<endl;

	const vector<int>& clust1=m_intermediateCG[clust1Index];

	const vector<int>& clust2=m_intermediateCG[clust2Index];

	vector<int> combinedClust;

	combinedClust.insert(combinedClust.end(),clust1.begin(),clust1.end());

	combinedClust.insert(combinedClust.end(),clust2.begin(),clust2.end());

	float clust1TotalSils=0.0f,clust2TotalSils=0.0f,combinedClustTotalSils=0.0f;

	for(int i=0;i<clust1.size();i++)
	{
		int dataObj=clust1[i];

		float avgIntraDist=0.0;

		if(clust1.size()>1)
		{
			for(int j=0;j<clust1.size();j++)
			{
				if(clust1[j]!=dataObj)
				{
					avgIntraDist+=getInterObjectDistance(dataObj,clust1[j]);

				}
			}

			avgIntraDist/=((float)(clust1.size()-1));
		}

		float minInterDist= FLT_MAX;

		for(int r=0;r<m_intermediateCG.size();r++)
		{
			float avgInterDist=0.0;

			if(r!=clust1Index)
			{
				for(int c=0;c<m_intermediateCG[r].size();c++)
				{
					avgInterDist+=getInterObjectDistance(dataObj,m_intermediateCG[r][c]);
				}

				avgInterDist/=((float)m_intermediateCG[r].size());

				if(avgInterDist<minInterDist)
				{
					minInterDist=avgInterDist;
				}
			}

		}

		float dataObjSil=0.0;

		if(minInterDist > avgIntraDist && minInterDist > EPS)
		{

			dataObjSil=(minInterDist-avgIntraDist)/minInterDist;

		}
		else if(avgIntraDist > EPS)
		{

			dataObjSil=(minInterDist-avgIntraDist)/avgIntraDist;

		}

		clust1TotalSils+=dataObjSil;
	}

	for(int ii=0;ii<clust2.size();ii++)
	{
		int dataObj=clust2[ii];

		float avgIntraDist=0.0;

		if(clust2.size()>1)
		{
			for(int j=0;j<clust2.size();j++)
			{
				if(clust2[j]!=dataObj)
				{

					avgIntraDist+=getInterObjectDistance(dataObj,clust2[j]);

				}
			}

			avgIntraDist/=((float)(clust2.size()-1));
		}

		float minInterDist= FLT_MAX;

		for(int r=0;r<m_intermediateCG.size();r++)
		{
			float avgInterDist=0.0;

			if(r!=clust2Index)
			{
				for(int c=0;c<m_intermediateCG[r].size();c++)
				{
					avgInterDist+=getInterObjectDistance(dataObj,
														 m_intermediateCG[r][c]);
				}

				avgInterDist/=((float)m_intermediateCG[r].size());

				if(avgInterDist < minInterDist)
				{
					minInterDist=avgInterDist;
				}
			}

		}
		float dataObjSil=0.0;

		if(minInterDist > avgIntraDist && minInterDist > EPS)
		{
			dataObjSil=(minInterDist-avgIntraDist)/minInterDist;
		}
		else if(avgIntraDist > EPS)
		{
			dataObjSil=(minInterDist-avgIntraDist)/avgIntraDist;
		}

		clust2TotalSils+=dataObjSil;
	}


	for(int iii=0;iii<combinedClust.size();iii++)
	{
		int dataObj=combinedClust[iii];

		float avgIntraDist=0.0;

		if(combinedClust.size()>1)
		{
			for(int j=0;j<combinedClust.size();j++)
			{
				if(combinedClust[j]!=dataObj)
				{
					avgIntraDist+=getInterObjectDistance(dataObj,combinedClust[j]);

				}
			}
			avgIntraDist/=((float)(combinedClust.size()-1));
		}

		float minInterDist=FLT_MAX;

		for(int r=0;r<m_intermediateCG.size();r++)
		{

			if(r!=clust1Index && r!=clust2Index)
			{
				float avgInterDist=0.0f;

				for(int c=0;c<m_intermediateCG[r].size();c++)
				{
					avgInterDist+=getInterObjectDistance(dataObj,m_intermediateCG[r][c]);
				}

				avgInterDist = (float)avgInterDist/ ((float)m_intermediateCG[r].size());

				if(avgInterDist<minInterDist)
				{
					minInterDist=avgInterDist;

				}
			}

		}

		float dataObjSil=0.0;

		if(minInterDist>avgIntraDist && minInterDist > EPS)
		{

			dataObjSil=(minInterDist-avgIntraDist)/minInterDist;

		}
		else if(avgIntraDist > EPS)
		{

			dataObjSil=(minInterDist-avgIntraDist)/avgIntraDist;

		}

		combinedClustTotalSils+=dataObjSil;
	}


	LOG(LTKLogger::LTK_LOGLEVEL_DEBUG)<<"Exiting: "
		<<"LTKHierarchicalClustering::computeAvgSil()"<<endl;


	return (combinedClustTotalSils-clust1TotalSils-clust2TotalSils);

}



};
#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -