📄 subclust.cpp
字号:
// subclust.cpp implementation of the CSubclust class.
// Author:
// Date: 2005/05/07
// Reference: the subclust function in Matlab 6.5
#include "stdafx.h"
#include "real_mat.h"
#include "math.h"
#include "memory.h"
#include "Subclust.h"
#include "GlobeDef.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
#define DIST_THRESHOLD 10 //暂时没用
#define max(a, b) (((a) > (b)) ? (a) : (b))
#define min(a, b) (((a) < (b)) ? (a) : (b))
CSubclust::CSubclust()
{
sqshFactor = 1.25;
acceptRatio = 0.5;
rejectRatio = 0.15;
verbose = 0;
m_nClusters = 0;
}
CSubclust::~CSubclust()
{
SAFE_DELETE_ARRAY(m_pCent);
SAFE_DELETE_ARRAY(m_pClusterCenterLabel);
}
/////////////////////////////////////////////////////////////////////////
//
// Function: SetData()
// Parameter:
// [I]double *px -- 样本点集
// [I]int* pLabel -- 样本点类别标记表
// [I]int nPoints -- 样本点的个数
// [I]int nDimension -- 样本点集维数
// [I]double radii -- 相减聚类的半径参数
// Return:
//
// Description: 初始化样本集
// Modification:
// 05/24/2005 jg.yuan add nDimension
// 06/02/2005 jg.yuan change function name with "SetData" &
// add radii & pLabel
////////////////////////////////////////////////////////////////////////
void CSubclust::SetData(double *pX,int *pLabel,int nPoints,int nDimension, double radii)
{
m_nPoints = nPoints;
m_pLabel = pLabel;
m_nDimension = nDimension;
m_pX = pX;
m_radii = radii;
}
/////////////////////////////////////////////////////////////////////////
//
// Function: SetAvgCenter()
// Parameter:
// [I]double *pAvgCenter -- 外部用于存储平均中心的内存指针
//
// Return:
//
// Description: 设置存储平均中心的内存地址
//
////////////////////////////////////////////////////////////////////////
void CSubclust::SetAvgCenter(double *pAvgCenter)
{
m_pAvgCenter = pAvgCenter;
}
/////////////////////////////////////////////////////////////////////////
//
// Function: FindMax()
// Parameter:
// [I]double *pX -- 数据点数组
// [I]int nNum -- 数据点个数
// [O]double& maxVal -- 数据点数组中最大值
//
// Return: 最大值的下标位置
//
// Description: 在数组中寻找最大值及其位置
// Modification:
// 05/24/2005 jg.yuan Move the known para "int nNum"
////////////////////////////////////////////////////////////////////////
int CSubclust::FindMax(double *pX, double& maxVal)
{
int maxIndex = 0;
maxVal = 0;
for(int j=0; j<m_nPoints; j++)
{
if(pX[j] > maxVal)
{
maxVal = pX[j];
maxIndex = j;
}
}
return maxIndex;
}
/////////////////////////////////////////////////////////////////////////
//
// Function: Normalize()
// Parameter:
// [I]double *pX -- 数据点数组
// [I]double* pMin -- 样本集每一维中的最小值
// [I]double* pMax -- 样本集每一维中的最大值
//
// Return:
//
// Description: 将样本点集规格化
// Modification:
// 05/24/2005 jg.yuan add to m_nDimension
////////////////////////////////////////////////////////////////////////
void CSubclust::Normalize(double *pX, double* pMin, double* pMax)
{
int i,j;
for(i=0; i<m_nPoints; i++)
{
for(j=0; j<m_nDimension; j++)
{
pMin[j] = min(pMin[j], pX[m_nDimension*i+j]);
pMax[j] = max(pMax[j], pX[m_nDimension*i+j]);
}
// pMin[0] = min(pMin[0],pX[2*i]);
// pMin[1] = min(pMin[1],pX[2*i+1]);
//
// pMax[0] = max(pMax[0],pX[2*i]);
// pMax[1] = max(pMax[1],pX[2*i+1]);
}
// index = find(maxX == minX);
// minX(index) = minX(index) - 0.0001*(1 + abs(minX(index)));
// maxX(index) = maxX(index) + 0.0001*(1 + abs(maxX(index)));
for(i=0; i<m_nPoints; i++)
{
for(j=0; j<m_nDimension; j++)
{
pX[m_nDimension*i+j] = (pX[m_nDimension*i+j] - pMin[j])
/(pMax[j] - pMin[j]);
pX[m_nDimension*i+j] = min(max(pX[m_nDimension*i+j], 0), 1);
}
// pX[2*i] = (pX[2*i] - pMin[0])/(pMax[0] - pMin[0]);
// pX[2*i+1] = (pX[2*i+1] - pMin[1])/(pMax[1] - pMin[1]);
//
// pX[2*i] = min(max(pX[2*i], 0), 1);
// pX[2*i+1] = min(max(pX[2*i+1], 0), 1);
}
}
/////////////////////////////////////////////////////////////////////////
//
// Function: ComputeInitPotVal()
// Parameter:
// [O]double* pPotVals -- 为每个点存储势
//
// Return:
//
// Description: 计算样本集中每个点的初始势
// Modification:
// 05/24/2005 jg.yuan add to m_nDimension
////////////////////////////////////////////////////////////////////////
void CSubclust::ComputeInitPotVal(double* pPotVals)
{
int i,j,k,m;
for(j=0; j<m_nDimension; j++)
{
accumMultp[j] = 1.0/m_radii;
}
// accumMultp[0] = 1.0/m_radii;
// accumMultp[1] = 1.0/m_radii;
double* dx = new double[m_nDimension];
double tmp = 0;
for(i=0; i<m_nPoints; i++)
{
// compute the initial potentials for each data point
for(j=0; j<m_nPoints; j++)
{
for(k=0; k<m_nDimension; k++)
{
dx[k] = (m_pX[m_nDimension*i+k]-m_pX[m_nDimension*j+k])
*accumMultp[k];
tmp += dx[k]*dx[k];
}
pPotVals[i] += exp(-4*tmp);
tmp = 0;
// dx[2*j] = (m_pX[2*i]-m_pX[2*j])*accumMultp[0];
// dx[2*j+1] = (m_pX[2*i+1]-m_pX[2*j+1])*accumMultp[1];
// pPotVals[i] += exp(-4*(dx[2*j]*dx[2*j]+dx[2*j+1]*dx[2*j+1]));
}
}
SAFE_DELETE_ARRAY(dx);
}
/////////////////////////////////////////////////////////////////////////
//
// Function: Subclust()
// Parameter:
//
// Return:
//
// Description: 对样本集进行相减聚类, 聚类中心存储在pCent中, 中心个数存
// 储在nClusters中
// Modification:
// 05/24/2005 jg.yuan add to m_nDimension
////////////////////////////////////////////////////////////////////////
void CSubclust::Subclust()
{
int i,j,k;
int nParams = 2;
// double minX[2] = {MAXWORD,MAXWORD};
// double maxX[2] = {-MAXWORD,-MAXWORD};
double* minX = new double[m_nDimension];
double* maxX = new double[m_nDimension];
for(i=0; i<m_nDimension; i++)
{
minX[i] = MAXWORD;
maxX[i] = -MAXWORD;
accumMultp[i] = 1.0/m_radii;
sqshMultp[i] = 1.0/(sqshFactor * m_radii);
}
// accumMultp[0] = 1.0/m_radii;
// accumMultp[1] = accumMultp[0];
// sqshMultp[0] = 1.0/(sqshFactor * m_radii);
// sqshMultp[1] = sqshMultp[0];
//对pX规格化处理
// Normalize(m_pX,m_nPoints,minX,maxX);
Normalize(m_pX, minX, maxX);
int maxPotIndex = 0;
double refPotVal = 0;
double* dx = NULL;
// 为dx分配临时存储区
try{
dx = new double[m_nDimension];
}
catch (CMemoryException* pME) {
pME->ReportError();
}
ASSERT(dx);
// 为势函数计算结果分配存储区
double* pPotVals = NULL;
try{
pPotVals = new double[m_nPoints];
}
catch (CMemoryException* pME) {
pME->ReportError();
}
if( pPotVals == NULL)
{
delete[] dx;
dx = NULL;
return;
}
memset(pPotVals,0,sizeof(double)*m_nPoints);
// 计算所有样本点的初始势
ComputeInitPotVal(pPotVals);
// 找出第一个类中心
maxPotIndex = FindMax(pPotVals,refPotVal);
int findMore = 1;
double maxPotVal = 0;
// double maxPoint[2] = {0,0};
double* maxPoint = new double[m_nDimension];
memset(maxPoint, 0, sizeof(double)*m_nDimension);
double maxPotRatio = 0;
// double dxElse[2] = {0,0};
double* dxElse = new double[m_nDimension];
memset(dxElse, 0, sizeof(double)*m_nDimension);
double dxSq = 0;
double minDistSq = -1;
double minDist = 0;
// 为聚类中心分配空间
try{
m_pCent = new double[m_nDimension*m_nPoints];
}
catch (CMemoryException* pME) {
pME->ReportError();
}
if(m_pCent == NULL)
{
delete[] dx;
delete[] pPotVals;
dx = NULL;
pPotVals = NULL;
return;
}
memset(m_pCent,0,sizeof(double)*m_nDimension*m_nPoints);
// pClusterCenterLabel标记中心在原始样本集中的位置
try{
m_pClusterCenterLabel = new int[m_nPoints];
}
catch (CMemoryException* pME) {
pME->ReportError();
}
if(m_pClusterCenterLabel == NULL)
{
delete[] dx;
delete[] pPotVals;
delete[] m_pCent;
m_pCent = NULL;
dx = NULL;
pPotVals = NULL;
return;
}
memset(m_pClusterCenterLabel,0,sizeof(int)*m_nPoints);
// Start iteratively finding cluster centers and subtracting potential
// from neighboring data points. maxPotVal is the current highest
// potential value and maxPotIndex is the associated data point's index.
maxPotVal = refPotVal;
// 初始化计数器
m_nClusters = 0;
double tmpsum = 0;
while(findMore==1 && maxPotVal>0)
{
findMore = 0;
//从样本集中找出maxPotIndex对应的点
for(i=0; i<m_nDimension; i++)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -