⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cftree.c

📁 Solaris环境下的数据挖掘算法:birch聚类算法。该算法适用于对大量数据的挖掘。
💻 C
📖 第 1 页 / 共 2 页
字号:
/****************************************************************File Name:   cftree.CAuthor: Tian Zhang, CS Dept., Univ. of Wisconsin-Madison, 1995               Copyright(c) 1995 by Tian Zhang                   All Rights ReservedPermission to use, copy and modify this software must be grantedby the author and provided that the above copyright notice appear in all relevant copies and that both that copyright notice and this permission notice appear in all relevant supporting documentations. Comments and additions may be sent the author at zhang@cs.wisc.edu.******************************************************************/#include "global.h"#include "util.h"#include "vector.h"#include "rectangle.h"#include "cfentry.h"#include "cutil.h"#include "status.h"#include "contree.h"#include "cftree.h"#include "path.h"int Node::N() const {	int tmp = 0;	for (int i=0; i<actsize; i++) 		tmp+=entry[i].n;	return tmp;}	void Node::SX(Vector& tmpsx) const {	tmpsx.Reset();	for (int i=0; i<actsize; i++)		tmpsx+=entry[i].sx;	}double Node::SXX() const {	double tmp=0;	for (int i=0; i<actsize; i++) 		tmp+=entry[i].sxx;	return tmp;}void Node::CF(Entry& tmpcf) const {	tmpcf.Reset();	for (int i=0; i<actsize; i++)		tmpcf+=entry[i];	}double Node::Radius() const {	Entry tmpent;	tmpent.Init(entry[0].sx.dim);	this->CF(tmpent);	return tmpent.Radius();	}double Node::Diameter() const {	Entry tmpent;	tmpent.Init(entry[0].sx.dim);	this->CF(tmpent);	return tmpent.Diameter();	}double Node::Fitness(short ftype) const {	Entry tmpent;	tmpent.Init(entry[0].sx.dim);	this->CF(tmpent);	return tmpent.Fitness(ftype);	}#ifdef RECTANGLEvoid Node::Rect(Rectangle& tmprect) const {	tmprect.Reset();	for (int i=0; i<actsize; i++)		tmprect+=entry[i].rect;	}#endif RECTANGLEdouble Leaf::AbsVofLevel(int i, short ftype, short dim) const {if (i>0) print_error("Leaf::AbsVofLevel","can not go further");if (i==0) return pow(sqrt(this->Fitness(ftype)),dim);}double Nonleaf::AbsVofLevel(int i, short ftype, short dim) const {double AbsV=0.0;if (i>Depth()-1) print_error("Nonleaf::AbsVofLevel","can not go further"); if (i==0) return pow(sqrt(this->Fitness(ftype)),dim);else { for (int j=0; j<actsize; j++) 	AbsV+=child[j]->AbsVofLevel(i-1,ftype,dim);       return AbsV;       }}int Leaf::Size() const { return 1; }int Nonleaf::Size() const {	int size=1;	for (int i=0; i<actsize; i++) 		size+=child[i]->Size();	return size;}int Leaf::Depth() const { return 1; }int Nonleaf::Depth() const {	if (actsize==0) return 1;         else return 1+child[0]->Depth();	}int Leaf::LeafNum() const { return 1; }int Nonleaf::LeafNum() const {	int num=0;	for (int i=0; i<actsize; i++) 		num+=child[i]->LeafNum();	return num;}int Leaf::NonleafNum() const { return 0; }int Nonleaf::NonleafNum() const {	int num=1;	for (int i=0; i<actsize; i++)		num+=child[i]->NonleafNum();	return num;}int Leaf::NumLeafEntry() const { return actsize; }int Leaf::NumNonleafEntry() const { return 0; }int Nonleaf::NumLeafEntry() const {	int tmpn = 0;	for (int i=0; i<actsize; i++)		tmpn+=child[i]->NumLeafEntry();	return tmpn;	}int Nonleaf::NumNonleafEntry() const {	int tmpn = actsize;	for (int i=0; i<actsize; i++)		tmpn+=child[i]->NumNonleafEntry();	return tmpn;	}int Leaf::NumEntry() const{ return actsize; }int Nonleaf::NumEntry() const {	int tmpn = actsize;	for (int i=0; i<actsize; i++) 		tmpn+=child[i]->NumEntry();	return tmpn;}double Leaf::Occupancy(Stat *Stats) const {	return actsize/(1.0*MaxSize(Stats));}double Nonleaf::Occupancy(Stat *Stats) const {	int leafsize, nonleafsize;#ifdef RECTANGLE	leafsize=(Stats->PageSize-2*sizeof(int))/		 (sizeof(int)+sizeof(double)*(3*Stats->Dimension+1));#else 	leafsize=(Stats->PageSize-2*sizeof(int))/		 (sizeof(int)+sizeof(double)*(Stats->Dimension+1));#endif	nonleafsize=MaxSize(Stats);	return NumEntry()/(1.0*nonleafsize*NonleafNum()+1.0*leafsize*LeafNum());}void Node::Print_Summary(Stat *Stats, ostream &fo) const {Entry tmpent;tmpent.Init(entry[0].sx.dim);CF(tmpent);fo<<"Root CF\t"<<tmpent<<endl;fo<<"FootPrint\t"<<sqrt(tmpent.Radius())<<endl;#ifdef RECTANGLERectangle tmprect;tmprect.Init(entry[0].sx.dim);Rect(tmprect);fo<<"Root Rectangle\t"<<tmprect<<endl;#endif RECTANGLEfo<<"Leaf Nodes\t"<<LeafNum()<<endl;fo<<"Nonleaf Nodes\t"<<NonleafNum()<<endl;fo<<"Tree Size\t"<<Size()<<endl;fo<<"Tree Depth\t"<<Depth()<<endl;fo<<"Leaf Entries\t"<<NumLeafEntry()<<endl;fo<<"Nonleaf Entries\t"<<NumNonleafEntry()<< endl;fo<<"Occupancy\t"<<Occupancy(Stats)<<endl;}void Node::Print_Summary(Stat *Stats, ofstream &fo) const {Entry tmpent;tmpent.Init(entry[0].sx.dim);CF(tmpent);fo<<"Root CF\t"<<tmpent<<endl;fo<<"FootPrint\t"<<sqrt(tmpent.Radius())<<endl;#ifdef RECTANGLERectangle tmprect;tmprect.Init(entry[0].sx.dim);Rect(tmprect);fo<<"Root Rectangle\t"<<tmprect<<endl;#endif RECTANGLEfo<<"Leaf Nodes\t"<<LeafNum()<<endl;fo<<"Nonleaf Nodes\t"<<NonleafNum()<<endl;fo<<"Tree Size\t"<<Size()<<endl;fo<<"Tree Depth\t"<<Depth()<<endl;fo<<"Leaf Entries\t"<<NumLeafEntry()<<endl;fo<<"Nonleaf Entries\t"<<NumNonleafEntry()<< endl;fo<<"Occupancy\t"<<Occupancy(Stats)<<endl;}Node* Leaf::DenseNode(){if (actsize>1) return this;else return NULL;}short Leaf::FreeEmptyNode(Stat *Stats){short flag=TRUE;for (int i=0; i<actsize; i++) 	if (entry[i].n>0) {flag=FALSE; break;}if (flag==TRUE) {	this->AssignNextPrev(Stats); 	delete this; 	Stats->MemUsed--; 	Stats->TreeSize--;	}return flag;}// fit without expanding or splittingshort Leaf::BestFitPath1(Stat *Stats, const Entry &ent, Path& BestPath){int EntryI=ClosestOne(Stats,ent);if (EntryI>=0) {	BestPath.Push(EntryI,this);	return TRUE;	}else return FALSE;}// fit with expanding but without splittingshort Leaf::BestFitPath2(Stat *Stats, const Entry &ent, Path& BestPath){int EntryI=ClosestOne(Stats,ent);if (EntryI>=0) {	BestPath.Push(EntryI,this);	return TRUE;	}else if (actsize<MaxSize(Stats)) {	BestPath.Push(actsize,this);	return TRUE;	}else return FALSE;}// absorb without expanding and splittingshort Leaf::AbsorbEntry1(Stat *Stats, const Entry &ent){int EntryI;EntryI=ClosestOne(Stats,ent);if (EntryI>=0) {	entry[EntryI]+=ent;	return TRUE;	}else return FALSE;}// absorb with expanding but without splittingshort Leaf::AbsorbEntry2(Stat *Stats, const Entry &ent){int EntryI;EntryI=ClosestOne(Stats,ent);if (EntryI>=0) {	entry[EntryI]+=ent;	return TRUE;	}else if (actsize<MaxSize(Stats)) {	entry[actsize]=ent;	actsize++; 	Stats->CurrEntryCnt++;	return TRUE;	}     else return FALSE;}ConNode* Leaf::Copy(Stat *Stats) const{ConNode* node=new ConNode(actsize,Stats);for (int i=0; i<actsize; i++) node->entry[i]=entry[i];return node;}ConNode* Nonleaf::Copy(Stat *Stats) const{ConNode* node=new ConNode(actsize,Stats);for (int i=0; i<actsize; i++) {	node->entry[i]=entry[i];	node->child[i]=child[i]->Copy(Stats);	}return node;}Node* Leaf::AdjustTree(Stat *Stats, const Entry &ent) {int  EntryI;Node *NewNode;if (actsize==0) {	entry[actsize]=ent;	actsize++; 	Stats->CurrEntryCnt++;	return NULL;	}EntryI=ClosestOne(Stats,ent);if (EntryI>=0) {	entry[EntryI]+=ent;	return NULL;	}else {	NewNode = InsertMightSplit(Stats,ent,NULL);	if (NewNode==NULL) return NULL;	else {		if (this!=Stats->OldRoot)			return NewNode;		else {  Stats->CreateNewRoot(this,NewNode);			return NULL;	             }             }	}}Node* Nonleaf::DenseNode(){Node *tmp;// less than 2 entries: this->N()<=1||this->actsize<=1 if (this->N()<=1) return NULL;if (this->actsize<=1) {	tmp = child[0]->DenseNode();	if (tmp==NULL) return NULL;	else return tmp;	}// more than 2 entries: this->N()>1&&this->actsize>1int EntryI = DensestEntry();tmp=child[EntryI]->DenseNode();if (tmp==NULL) return this;else return tmp;}short Nonleaf::FreeEmptyNode(Stat *Stats){short flag=TRUE;int i=0;while (i<actsize) {	if (child[i]->FreeEmptyNode(Stats)==TRUE) {		actsize--;		entry[i]=entry[actsize];		child[i]=child[actsize];		}	else {  flag=FALSE;		i++;		}	}if (flag==TRUE) {	delete this; 	Stats->MemUsed--; 	Stats->TreeSize--;	}return flag;}		// fit without expanding and splittingshort Nonleaf::BestFitPath1(Stat *Stats, const Entry &ent, Path& BestPath){int EntryI=ClosestOne(Stats,ent);if (EntryI>=0) {	BestPath.Push(EntryI,this);		if (child[EntryI]->BestFitPath1(Stats,ent,BestPath)==TRUE) 		return TRUE;	else 	return FALSE;	}else return FALSE;}// fit with expanding but without splittingshort Nonleaf::BestFitPath2(Stat *Stats, const Entry &ent, Path& BestPath){int EntryI=ClosestOne(Stats,ent);if (EntryI>=0) {	BestPath.Push(EntryI,this);		if (child[EntryI]->BestFitPath2(Stats,ent,BestPath)==TRUE) 		return TRUE;	else 	return FALSE;	}else return FALSE;}// absorb without expanding and splittingshort Nonleaf::AbsorbEntry1(Stat *Stats, const Entry &ent){int EntryI;EntryI = ClosestOne(Stats,ent);if (child[EntryI]->AbsorbEntry1(Stats,ent)==TRUE) {	entry[EntryI]+=ent;	return TRUE;	}else return FALSE;}// absorb with expanding but without splittingshort Nonleaf::AbsorbEntry2(Stat *Stats, const Entry &ent){int EntryI;EntryI=ClosestOne(Stats,ent);if (child[EntryI]->AbsorbEntry2(Stats,ent)==TRUE) {	entry[EntryI]+=ent;	return TRUE;	}else return FALSE;}Node* Nonleaf::AdjustTree(Stat *Stats, const Entry &ent){int EntryI;int i,j;double d;Node *ResNode, *NewNode;Entry ResEnt;ResEnt.Init(Stats->Dimension);EntryI=ClosestOne(Stats,ent);ResNode=child[EntryI]->AdjustTree(Stats,ent);if (ResNode!=NULL) { // Split Propagate	child[EntryI]->CF(entry[EntryI]);	ResNode->CF(ResEnt);	NewNode=InsertMightSplit(Stats,ResEnt,ResNode); 	if (NewNode==NULL) { // Split Propagate Stops	   if (actsize>2) {		d=ClosestTwo(Stats, i, j);		if (!(i==EntryI&&j==actsize-1))	       		MergeMightResplit(Stats, i, j);		}	  return NULL;	  }	 else { if (this!=Stats->OldRoot) 			return NewNode;	        else { // Create New Root	  		Stats->CreateNewRoot(this,NewNode);	  		return NULL;	        	}              }	}else { // No Split Coming Up	entry[EntryI]+=ent; 	return NULL;	}}int Nonleaf::DensestEntry() const{int i, imax, nmax;imax = 0;nmax = entry[0].n;for (i=1; i<actsize; i++) {	if (entry[i].n>nmax) {		imax = i;		nmax = entry[i].n;		}	}return imax;}int Nonleaf::ClosestOne(Stat *Stats, const Entry& ent) const {int i, imin;double d, dmin;// empty nodeif (actsize<=1) return 0;// nonemptry nodeimin = 0;dmin = HUGE_DOUBLE;for (i=0; i<actsize; i++) {    if (entry[i].n>0) {	d = distance(Stats->BDtype,entry[i],ent);	if (d<dmin) {dmin=d;imin=i;}	}    }if (dmin<HUGE_DOUBLE) return imin;else return -1;}int Leaf::DensestEntry() const{// do nothing and never be called.}int Leaf::ClosestOne(Stat *Stats, const Entry& ent) const {int i, imin;double d, dmin;// empty nodeif (actsize<1) return 0;// nonempty nodeimin = 0;dmin = HUGE_DOUBLE;/***************************************************// option1: min average D: tend to cause overlapping for (i=0; i<actsize; i++) {    if (entry[i].n>0) {	d = fitness(Stats->Ftype,entry[i],ent);	if (d<dmin) {		dmin=d;		imin=i;		}	}    }if (dmin<=Stats->CurFt+PRECISION_ERROR) return imin;else return -1;***************************************************/// option2: min average Di: less overlappingfor (i=0; i<actsize; i++) {    if (entry[i].n>0) {	d = distance(Stats->BDtype,entry[i],ent);	if (d<dmin) {		dmin=d;		imin=i;		}	}    }if (dmin<HUGE_DOUBLE) {	dmin = fitness(Stats->Ftype,entry[imin],ent);        if (dmin<=Stats->CurFt+PRECISION_ERROR) return imin;        else return -1;	}else return -1;}double Leaf::ClosestTwo(Stat *Stats, int &i, int &j) const {int i1,j1,imin,jmin;double d, dmin;if (actsize<2) 	print_error("Leaf::ClosestTwo","Less than 2 entries");if (actsize==2) {	i=0; j=1; 	return distance(Stats->BDtype,entry[0],entry[1]);	}imin = 0; jmin = 1;dmin = distance(Stats->BDtype,entry[0],entry[1]);for (i1=0;i1<actsize-1;i1++)   for (j1=i1+1;j1<actsize;j1++) {		d = distance(Stats->BDtype,entry[i1],entry[j1]);		if (d<dmin) { 			imin = i1; 			jmin = j1; 			dmin = d;			}	}i=imin; j=jmin;return dmin;}// only relevant to phase 3 and hierarchical clusteringdouble Leaf::ClosestDiffTwo(Stat *Stats, int &i, int &j) const {Entry tmpent;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -