⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cftree.c

📁 数据挖掘经典的hierarchial clustering algorithm
💻 C
📖 第 1 页 / 共 2 页
字号:
/****************************************************************
File Name:   cftree.C
Author: Tian Zhang, CS Dept., Univ. of Wisconsin-Madison, 1995

               Copyright(c) 1995 by Tian Zhang

                   All Rights Reserved

Permission to use, copy and modify this software must be granted
by the author and provided that the above copyright notice appear
in all relevant copies and that both that copyright notice and this
permission notice appear in all relevant supporting documentations.

Comments and additions may be sent the author at zhang@cs.wisc.edu.

******************************************************************/

#include "global.h"
#include "util.h"
#include "vector.h"
#include "rectangle.h"
#include "cfentry.h"
#include "cutil.h"
#include "status.h"
#include "contree.h"
#include "cftree.h"
#include "path.h"

int Node::N() const {
        int tmp = 0;
        for (int i=0; i<actsize; i++)
                tmp+=entry[i].n;
        return tmp;
}

void Node::SX(Vector& tmpsx) const {
        tmpsx.Reset();
        for (int i=0; i<actsize; i++)
                tmpsx+=entry[i].sx;
        }

double Node::SXX() const {
        double tmp=0;
        for (int i=0; i<actsize; i++)
                tmp+=entry[i].sxx;
        return tmp;
}

void Node::CF(Entry& tmpcf) const {
        tmpcf.Reset();
        for (int i=0; i<actsize; i++)
                tmpcf+=entry[i];
        }

double Node::Radius() const {
        Entry tmpent;
        tmpent.Init(entry[0].sx.dim);
        this->CF(tmpent);
        return tmpent.Radius();
        }

double Node::Diameter() const {
        Entry tmpent;
        tmpent.Init(entry[0].sx.dim);
        this->CF(tmpent);
        return tmpent.Diameter();
        }

double Node::Fitness(short ftype) const {
        Entry tmpent;
        tmpent.Init(entry[0].sx.dim);
        this->CF(tmpent);
        return tmpent.Fitness(ftype);
        }

#ifdef RECTANGLE
void Node::Rect(Rectangle& tmprect) const {
        tmprect.Reset();
        for (int i=0; i<actsize; i++)
                tmprect+=entry[i].rect;
        }
#endif RECTANGLE

double Leaf::AbsVofLevel(int i, short ftype, short dim) const {
if (i>0) print_error("Leaf::AbsVofLevel","can not go further");
if (i==0) return pow(sqrt(this->Fitness(ftype)),dim);
}

double Nonleaf::AbsVofLevel(int i, short ftype, short dim) const {
double AbsV=0.0;
if (i>Depth()-1) print_error("Nonleaf::AbsVofLevel","can not go further");
if (i==0) return pow(sqrt(this->Fitness(ftype)),dim);
else { for (int j=0; j<actsize; j++)
        AbsV+=child[j]->AbsVofLevel(i-1,ftype,dim);
       return AbsV;
       }
}

int Leaf::Size() const { return 1; }

int Nonleaf::Size() const {
        int size=1;
        for (int i=0; i<actsize; i++)
                size+=child[i]->Size();
        return size;
}

int Leaf::Depth() const { return 1; }

int Nonleaf::Depth() const {
        if (actsize==0) return 1;
        else return 1+child[0]->Depth();
        }

int Leaf::LeafNum() const { return 1; }

int Nonleaf::LeafNum() const {
        int num=0;
        for (int i=0; i<actsize; i++)
                num+=child[i]->LeafNum();
        return num;
}

int Leaf::NonleafNum() const { return 0; }

int Nonleaf::NonleafNum() const {
        int num=1;
        for (int i=0; i<actsize; i++)
                num+=child[i]->NonleafNum();
        return num;
}

int Leaf::NumLeafEntry() const { return actsize; }

int Leaf::NumNonleafEntry() const { return 0; }

int Nonleaf::NumLeafEntry() const {
        int tmpn = 0;
        for (int i=0; i<actsize; i++)
                tmpn+=child[i]->NumLeafEntry();
        return tmpn;
        }

int Nonleaf::NumNonleafEntry() const {
        int tmpn = actsize;
        for (int i=0; i<actsize; i++)
                tmpn+=child[i]->NumNonleafEntry();
        return tmpn;
        }

int Leaf::NumEntry() const{ return actsize; }

int Nonleaf::NumEntry() const {
        int tmpn = actsize;
        for (int i=0; i<actsize; i++)
                tmpn+=child[i]->NumEntry();
        return tmpn;
}

double Leaf::Occupancy(Stat *Stats) const {
        return actsize/(1.0*MaxSize(Stats));
}

double Nonleaf::Occupancy(Stat *Stats) const {
        int leafsize, nonleafsize;
#ifdef RECTANGLE
        leafsize=(Stats->PageSize-2*sizeof(int))/
                 (sizeof(int)+sizeof(double)*(3*Stats->Dimension+1));
#else
        leafsize=(Stats->PageSize-2*sizeof(int))/
                 (sizeof(int)+sizeof(double)*(Stats->Dimension+1));
#endif
        nonleafsize=MaxSize(Stats);
        return NumEntry()/(1.0*nonleafsize*NonleafNum()+1.0*leafsize*LeafNum());
}

void Node::Print_Summary(Stat *Stats, ostream &fo) const
{
Entry tmpent;
tmpent.Init(entry[0].sx.dim);
CF(tmpent);
fo<<"Root CF\t"<<tmpent<<endl;
fo<<"FootPrint\t"<<sqrt(tmpent.Radius())<<endl;

#ifdef RECTANGLE
Rectangle tmprect;
tmprect.Init(entry[0].sx.dim);
Rect(tmprect);
fo<<"Root Rectangle\t"<<tmprect<<endl;
#endif RECTANGLE

fo<<"Leaf Nodes\t"<<LeafNum()<<endl;
fo<<"Nonleaf Nodes\t"<<NonleafNum()<<endl;
fo<<"Tree Size\t"<<Size()<<endl;
fo<<"Tree Depth\t"<<Depth()<<endl;

fo<<"Leaf Entries\t"<<NumLeafEntry()<<endl;
fo<<"Nonleaf Entries\t"<<NumNonleafEntry()<< endl;
fo<<"Occupancy\t"<<Occupancy(Stats)<<endl;
}

void Node::Print_Summary(Stat *Stats, ofstream &fo) const
{
Entry tmpent;
tmpent.Init(entry[0].sx.dim);
CF(tmpent);
fo<<"Root CF\t"<<tmpent<<endl;
fo<<"FootPrint\t"<<sqrt(tmpent.Radius())<<endl;

#ifdef RECTANGLE
Rectangle tmprect;
tmprect.Init(entry[0].sx.dim);
Rect(tmprect);
fo<<"Root Rectangle\t"<<tmprect<<endl;
#endif RECTANGLE

fo<<"Leaf Nodes\t"<<LeafNum()<<endl;
fo<<"Nonleaf Nodes\t"<<NonleafNum()<<endl;
fo<<"Tree Size\t"<<Size()<<endl;
fo<<"Tree Depth\t"<<Depth()<<endl;

fo<<"Leaf Entries\t"<<NumLeafEntry()<<endl;
fo<<"Nonleaf Entries\t"<<NumNonleafEntry()<< endl;
fo<<"Occupancy\t"<<Occupancy(Stats)<<endl;
}

Node* Leaf::DenseNode()
{
if (actsize>1) return this;
else return NULL;
}

short Leaf::FreeEmptyNode(Stat *Stats)
{
short flag=TRUE;
for (int i=0; i<actsize; i++)
        if (entry[i].n>0) {flag=FALSE; break;}

if (flag==TRUE) {
        this->AssignNextPrev(Stats);
        delete this;
        Stats->MemUsed--;
        Stats->TreeSize--;
        }

return flag;
}

// fit without expanding or splitting
short Leaf::BestFitPath1(Stat *Stats, const Entry &ent, Path& BestPath)
{
int EntryI=ClosestOne(Stats,ent);
if (EntryI>=0) {
        BestPath.Push(EntryI,this);
        return TRUE;
        }
else return FALSE;
}

// fit with expanding but without splitting
short Leaf::BestFitPath2(Stat *Stats, const Entry &ent, Path& BestPath)
{
int EntryI=ClosestOne(Stats,ent);
if (EntryI>=0) {
        BestPath.Push(EntryI,this);
        return TRUE;
        }
else if (actsize<MaxSize(Stats)) {
        BestPath.Push(actsize,this);
        return TRUE;
        }
else return FALSE;
}

// absorb without expanding and splitting
short Leaf::AbsorbEntry1(Stat *Stats, const Entry &ent)
{
int EntryI;
EntryI=ClosestOne(Stats,ent);
if (EntryI>=0) {
        entry[EntryI]+=ent;
        return TRUE;
        }
else return FALSE;
}

// absorb with expanding but without splitting
short Leaf::AbsorbEntry2(Stat *Stats, const Entry &ent)
{
int EntryI;
EntryI=ClosestOne(Stats,ent);
if (EntryI>=0) {
        entry[EntryI]+=ent;
        return TRUE;
        }
else if (actsize<MaxSize(Stats)) {
        entry[actsize]=ent;
        actsize++;
        Stats->CurrEntryCnt++;
        return TRUE;
        }
     else return FALSE;
}

ConNode* Leaf::Copy(Stat *Stats) const
{
ConNode* node=new ConNode(actsize,Stats);
for (int i=0; i<actsize; i++) node->entry[i]=entry[i];
return node;
}

ConNode* Nonleaf::Copy(Stat *Stats) const
{
ConNode* node=new ConNode(actsize,Stats);
for (int i=0; i<actsize; i++) {
        node->entry[i]=entry[i];
        node->child[i]=child[i]->Copy(Stats);
        }
return node;
}

Node* Leaf::AdjustTree(Stat *Stats, const Entry &ent)
{
int  EntryI;
Node *NewNode;

if (actsize==0) {
        entry[actsize]=ent;
        actsize++;
        Stats->CurrEntryCnt++;
        return NULL;
        }

EntryI=ClosestOne(Stats,ent);
if (EntryI>=0) {
        entry[EntryI]+=ent;
        return NULL;
        }
else {
        NewNode = InsertMightSplit(Stats,ent,NULL);
        if (NewNode==NULL) return NULL;
        else {
                if (this!=Stats->OldRoot)
                        return NewNode;
                else {  Stats->CreateNewRoot(this,NewNode);
                        return NULL;
                     }
             }
        }
}

Node* Nonleaf::DenseNode()
{
Node *tmp;

// less than 2 entries: this->N()<=1||this->actsize<=1
if (this->N()<=1) return NULL;
if (this->actsize<=1) {
        tmp = child[0]->DenseNode();
        if (tmp==NULL) return NULL;
        else return tmp;
        }

// more than 2 entries: this->N()>1&&this->actsize>1
int EntryI = DensestEntry();
tmp=child[EntryI]->DenseNode();
if (tmp==NULL) return this;
else return tmp;
}

short Nonleaf::FreeEmptyNode(Stat *Stats)
{
short flag=TRUE;
int i=0;
while (i<actsize) {
        if (child[i]->FreeEmptyNode(Stats)==TRUE) {
                actsize--;
                entry[i]=entry[actsize];
                child[i]=child[actsize];
                }
        else {  flag=FALSE;
                i++;
                }
        }

if (flag==TRUE) {
        delete this;
        Stats->MemUsed--;
        Stats->TreeSize--;
        }
return flag;
}

// fit without expanding and splitting
short Nonleaf::BestFitPath1(Stat *Stats, const Entry &ent, Path& BestPath)
{
int EntryI=ClosestOne(Stats,ent);
if (EntryI>=0) {
        BestPath.Push(EntryI,this);
        if (child[EntryI]->BestFitPath1(Stats,ent,BestPath)==TRUE)
                return TRUE;
        else    return FALSE;
        }
else return FALSE;
}

// fit with expanding but without splitting
short Nonleaf::BestFitPath2(Stat *Stats, const Entry &ent, Path& BestPath)
{
int EntryI=ClosestOne(Stats,ent);
if (EntryI>=0) {
        BestPath.Push(EntryI,this);
        if (child[EntryI]->BestFitPath2(Stats,ent,BestPath)==TRUE)
                return TRUE;
        else    return FALSE;
        }
else return FALSE;
}

// absorb without expanding and splitting
short Nonleaf::AbsorbEntry1(Stat *Stats, const Entry &ent)
{
int EntryI;
EntryI = ClosestOne(Stats,ent);
if (child[EntryI]->AbsorbEntry1(Stats,ent)==TRUE) {
        entry[EntryI]+=ent;
        return TRUE;
        }
else return FALSE;
}

// absorb with expanding but without splitting
short Nonleaf::AbsorbEntry2(Stat *Stats, const Entry &ent)
{
int EntryI;
EntryI=ClosestOne(Stats,ent);
if (child[EntryI]->AbsorbEntry2(Stats,ent)==TRUE) {
        entry[EntryI]+=ent;
        return TRUE;
        }
else return FALSE;
}

Node* Nonleaf::AdjustTree(Stat *Stats, const Entry &ent)
{
int EntryI;
int i,j;
double d;

Node *ResNode, *NewNode;
Entry ResEnt;
ResEnt.Init(Stats->Dimension);

EntryI=ClosestOne(Stats,ent);
ResNode=child[EntryI]->AdjustTree(Stats,ent);

if (ResNode!=NULL) { // Split Propagate
        child[EntryI]->CF(entry[EntryI]);
        ResNode->CF(ResEnt);
        NewNode=InsertMightSplit(Stats,ResEnt,ResNode);
        if (NewNode==NULL) { // Split Propagate Stops
           if (actsize>2) {
                d=ClosestTwo(Stats, i, j);
                if (!(i==EntryI&&j==actsize-1))
                        MergeMightResplit(Stats, i, j);
                }
          return NULL;
          }
         else { if (this!=Stats->OldRoot)
                        return NewNode;
                else { // Create New Root
                        Stats->CreateNewRoot(this,NewNode);
                        return NULL;
                        }
              }
        }
else { // No Split Coming Up
        entry[EntryI]+=ent;
        return NULL;
        }
}

int Nonleaf::DensestEntry() const
{
int i, imax, nmax;
imax = 0;
nmax = entry[0].n;
for (i=1; i<actsize; i++) {
        if (entry[i].n>nmax) {
                imax = i;
                nmax = entry[i].n;
                }
        }
return imax;
}

int Nonleaf::ClosestOne(Stat *Stats, const Entry& ent) const
{
int i, imin;
double d, dmin;

// empty node
if (actsize<=1) return 0;

// nonemptry node
imin = 0;
dmin = HUGE_DOUBLE;

for (i=0; i<actsize; i++) {
    if (entry[i].n>0) {
        d = distance(Stats->BDtype,entry[i],ent);
        if (d<dmin) {dmin=d;imin=i;}
        }
    }
if (dmin<HUGE_DOUBLE) return imin;
else return -1;
}

int Leaf::DensestEntry() const
{
// do nothing and never be called.
}

int Leaf::ClosestOne(Stat *Stats, const Entry& ent) const
{
int i, imin;
double d, dmin;

// empty node
if (actsize<1) return 0;

// nonempty node
imin = 0;
dmin = HUGE_DOUBLE;

/***************************************************
// option1: min average D: tend to cause overlapping
for (i=0; i<actsize; i++) {
    if (entry[i].n>0) {
        d = fitness(Stats->Ftype,entry[i],ent);
        if (d<dmin) {
                dmin=d;
                imin=i;
                }
        }
    }
if (dmin<=Stats->CurFt+PRECISION_ERROR) return imin;
else return -1;

***************************************************/

// option2: min average Di: less overlapping
for (i=0; i<actsize; i++) {
    if (entry[i].n>0) {
        d = distance(Stats->BDtype,entry[i],ent);
        if (d<dmin) {
                dmin=d;
                imin=i;
                }
        }
    }
if (dmin<HUGE_DOUBLE) {
        dmin = fitness(Stats->Ftype,entry[imin],ent);
        if (dmin<=Stats->CurFt+PRECISION_ERROR) return imin;
        else return -1;
        }
else return -1;
}

double Leaf::ClosestTwo(Stat *Stats, int &i, int &j) const {
int i1,j1,imin,jmin;
double d, dmin;

if (actsize<2)
        print_error("Leaf::ClosestTwo","Less than 2 entries");

if (actsize==2) {
        i=0; j=1;
        return distance(Stats->BDtype,entry[0],entry[1]);
        }

imin = 0;
jmin = 1;
dmin = distance(Stats->BDtype,entry[0],entry[1]);

for (i1=0;i1<actsize-1;i1++)
   for (j1=i1+1;j1<actsize;j1++) {
                d = distance(Stats->BDtype,entry[i1],entry[j1]);
                if (d<dmin) {
                        imin = i1;
                        jmin = j1;
                        dmin = d;
                        }
        }
i=imin;
j=jmin;
return dmin;
}

// only relevant to phase 3 and hierarchical clustering

double Leaf::ClosestDiffTwo(Stat *Stats, int &i, int &j) const
{
Entry tmpent;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -