⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cftree.c

📁 数据挖掘经典的hierarchial clustering algorithm
💻 C
📖 第 1 页 / 共 2 页
字号:
tmpent.Init(Stats->Dimension);
int i1,j1,imin,jmin;
double d, dmin;

if (actsize<2)
        print_error("Leaf::ClosestDiffTwo","Less than 2 entries");

if (actsize==2)
        {d=distance(Stats->GDtype,entry[0],entry[1]);
         if (d<=0)
         print_error("Leaf::ClosestDiffTwo",
                "Same 2 entries in a leaf: should not happen");
         }

dmin = HUGE_DOUBLE;
imin=0;
jmin=1;
for (i1=0;i1<actsize-1;i1++)
   for (j1=i1+1;j1<actsize;j1++) {
                d = distance(Stats->GDtype,entry[i1],entry[j1]);
                if (d>0 && d<dmin) {
                        imin = i1;
                        jmin = j1;
                        dmin = d;
                        }
        }
i=imin;
j=jmin;
tmpent.Add(entry[i],entry[j]);
return tmpent.Fitness(Stats->Ftype);
}

void Leaf::FarthestTwo(Stat *Stats, int &i, int &j) const {
int i1,j1,imax,jmax;
double d, dmax;

if (actsize<2)
        print_error("Leaf::FarthestTwo","Less than 2 entries");

if (actsize==2)
        {i=0; j=1; return;}

imax = 0;
jmax = 1;
dmax = distance(Stats->BDtype,entry[0],entry[1]);

for (i1=0;i1<actsize-1;i1++)
   for (j1=i1+1;j1<actsize;j1++) {
                d = distance(Stats->BDtype,entry[i1],entry[j1]);
                if (d>dmax) {
                        imax = i1;
                        jmax = j1;
                        dmax = d;}
        }
i=imax; j=jmax;
}

double Nonleaf::ClosestTwo(Stat *Stats, int &i, int &j) const {
int i1,j1,imin,jmin;
double d, dmin;

if (actsize<2)
        print_error("Nonleaf::ClosestTwo","Less than 2 entries");

if (actsize==2) {
        i=0; j=1;
        return distance(Stats->BDtype,entry[0],entry[1]);
        }

imin = 0;
jmin = 1;
dmin = distance(Stats->BDtype,entry[0],entry[1]);

for (i1=0;i1<actsize-1;i1++)
   for (j1=i1+1;j1<actsize;j1++) {
                d = distance(Stats->BDtype,entry[i1],entry[j1]);
                if (d<dmin) {
                        imin = i1;
                        jmin = j1;
                        dmin = d;
                        }
        }
i=imin;
j=jmin;
return dmin;
}

double Nonleaf::ClosestDiffTwo(Stat *Stats, int &i, int &j) const
{
Entry tmpent;
tmpent.Init(Stats->Dimension);
int i1,j1,imin,jmin;
double d, dmin;

if (actsize<2)
        print_error("Nonleaf::ClosestDiffTwo","Less than 2 entries");

if (actsize==2) {
        d=distance(Stats->GDtype,entry[0],entry[1]);
        if (d==0)
        print_error("Nonleaf::ClosestDiffTwo",
                    "Same 2 entries in a nonleaf: should not happen");
        }

dmin=HUGE_DOUBLE;
imin=0;
jmin=1;
for (i1=0;i1<actsize-1;i1++)
   for (j1=i1+1;j1<actsize;j1++) {
                d = distance(Stats->GDtype,entry[i1],entry[j1]);
                if (d>0 && d<dmin) {
                        imin = i1;
                        jmin = j1;
                        dmin = d;}
        }
i=imin;
j=jmin;
tmpent.Add(entry[i],entry[j]);
return tmpent.Fitness(Stats->Ftype);
}

void Nonleaf::FarthestTwo(Stat *Stats, int &i, int &j) const {
int i1,j1,imax,jmax;
double d, dmax;

if (actsize<2)
        print_error("Nonleaf::FarthestTwo","Less than 2 entries");

if (actsize==2)
        {i=0; j=1; return;}

imax = 0;
jmax = 1;
dmax = distance(Stats->BDtype,entry[0],entry[1]);
for (i1=0;i1<actsize-1;i1++)
   for (j1=i1+1;j1<actsize;j1++) {
                d = distance(Stats->BDtype,entry[i1],entry[j1]);
                if (d>dmax) {
                        imax = i1;
                        jmax = j1;
                        dmax = d;}
        }
i=imax; j=jmax;
}

// follow chain of leaves
int Leaf::count_leaf_nodes() const {
int num=1;
if (this->next!=NULL) num+=this->next->count_leaf_nodes();
return num;
}

int Leaf::count_leaf_entries() const {
int num=actsize;
if (this->next!=NULL) num+=this->next->count_leaf_entries();
return num;
}

int Leaf::count_leaf_tuples() const {
int num=0;
for (int i=0; i<actsize; i++) num += entry[i].n;
if (this->next!=NULL) num+=this->next->count_leaf_tuples();
return num;
}

void Nonleaf::Print_Tree(short ind, ostream &fo) const {
for (int i=0; i<actsize; i++) {
        indent(ind,fo); fo<<entry[i]<<endl;
        child[i]->Print_Tree(ind+5,fo);
        }
}

void Nonleaf::Print_Tree(short ind, ofstream &fo) const {
for (int i=0; i<actsize; i++) {
        indent(ind,fo); fo<<entry[i]<<endl;
        child[i]->Print_Tree(ind+5,fo);
        }
}

void Leaf::Print_Tree(short ind, ostream &fo) const {
for (int i=0; i<actsize; i++) {
        indent(ind,fo); fo<<entry[i]<< endl;
        }
}
void Leaf::Print_Tree(short ind, ofstream &fo) const {
for (int i=0; i<actsize; i++) {
        indent(ind,fo); fo<<entry[i]<<endl;
        }
}

void Leaf::visualize_leaf_entries(Stat *Stats, ostream &fo) const {
if (Stats->Dimension>2)
print_error("Nonleaf::Visualize",
            "can't visualize higher than 2 dimensions");
int i;
Leaf *tmp;
tmp=this;
while (tmp!=NULL) {
    for (i=0; i<tmp->actsize; i++)
        tmp->entry[i].Visualize_Rectangle(fo);
    tmp=tmp->next;
    }
tmp=this;
while (tmp!=NULL) {
    for (i=0; i<tmp->actsize; i++)
        tmp->entry[i].Visualize_Circle(fo);
    tmp=tmp->next;
    }
}

void Leaf::visualize_leaf_entries(Stat *Stats, ofstream &fo) const {
if (Stats->Dimension>2)
print_error("Nonleaf::Visualize",
            "can't visualize higher than 2 dimensions");
int i;
Leaf *tmp;
tmp=this;
while (tmp!=NULL) {
    for (i=0; i<tmp->actsize; i++)
        tmp->entry[i].Visualize_Rectangle(fo);
    tmp=tmp->next;
    }
tmp=this;
while (tmp!=NULL) {
    for (i=0; i<tmp->actsize; i++)
        tmp->entry[i].Visualize_Circle(fo);
    tmp=tmp->next;
    }
}

// follow chain of leaves
void Leaf::print_leaf_entries_bychain(ofstream &fo) const {
fo<<"#"<<actsize<<endl;
for (int i=0; i<actsize; i++)
        fo<<entry[i]<<endl;
if (this->next!=NULL)
        this->next->print_leaf_entries_bychain(fo);
}

// follow chain of leaves
void Leaf::print_leaf_entries_bychain(ostream &fo) const {
for (int i=0; i<actsize; i++)
        fo<<entry[i]<<endl;
if (this->next!=NULL)
        this->next->print_leaf_entries_bychain(fo);
}

// topdown search for leaves
void Leaf::print_leaf_entries_topdown(ofstream &fo) const {
fo<<"#"<<actsize<<endl;
for (int i=0; i<actsize; i++)
        fo<<entry[i]<<endl;
}

// topdown search for leaves
void Nonleaf::print_leaf_entries_topdown(ofstream &fo) const {
for (int i=0; i<actsize; i++)
        child[i]->print_leaf_entries_topdown(fo);
}

Node* Leaf::InsertMightSplit(Stat *Stats, const Entry &Ent, Node *ptr)
{
short   samegroup;
Leaf    *NewNode;
int     i1,i2,n1,n2,head1,head2;
double  d1, d2;
Entry   ent1,ent2;
ent1.Init(Stats->Dimension);
ent2.Init(Stats->Dimension);

if (NotFull(Stats)) {
        entry[actsize]=Ent;
        actsize++;
        Stats->CurrEntryCnt++;
        return NULL;
        }

NewNode = new Leaf(Stats);
Stats->MemUsed++;
Stats->TreeSize++;
NewNode->entry[NewNode->actsize]=Ent;
NewNode->actsize++;
Stats->CurrEntryCnt++;

// chain leaf nodes
if (this->next!=NULL) {
        this->next->prev=NewNode;
        }
NewNode->next=this->next;
NewNode->prev=this;
this->next=NewNode;

this->FarthestTwoOut(Stats,this,NewNode,samegroup,i1,i2);
switch (samegroup) {
case 0: this->swap(this,0, this,i1);
        NewNode->swap(NewNode,0, NewNode,i2);
        break;
case 1: this->swap(this,0, this,i1);
        NewNode->swap(NewNode,0, this,i2);
        break;
default: print_error("Leaf::InsertMightSplit","Invalid group flag");
}

n1 = MaxSize(Stats);
n2 = 1;

head1 = 1; head2 = 1;
ent1 = this->entry[0];
ent2 = NewNode->entry[0];
while (head1<n1) {
        d1 = distance(Stats->BDtype,ent1,this->entry[head1]);
        d2 = distance(Stats->BDtype,ent2,this->entry[head1]);
        if (d1<d2) {
                ent1+=this->entry[head1];
                head1++;
                }
        else {  this->assign(NewNode,head2,this,head1);
                this->assign(this,head1,this,n1-1);
                ent2+=NewNode->entry[head2];
                head2++;
                n1--;
                n2++;
                }
        }
this->actsize=n1;
NewNode->actsize=n2;
return(NewNode);
}

Node* Nonleaf::InsertMightSplit(Stat *Stats, const Entry &Ent, Node *Ptr)
{
short   samegroup;
Nonleaf *NewNode;
int     i1,i2,n1,n2,head1,head2;
double  d1, d2;

Entry   ent1,ent2;
ent1.Init(Stats->Dimension);
ent2.Init(Stats->Dimension);

if (NotFull(Stats)) {
        entry[actsize]=Ent;
        child[actsize]=Ptr;
        actsize++;
        return NULL;
        }

NewNode=new Nonleaf(Stats);
Stats->MemUsed++;
Stats->TreeSize++;
NewNode->entry[NewNode->actsize]=Ent;
NewNode->child[NewNode->actsize]=Ptr;
NewNode->actsize++;

this->FarthestTwoOut(Stats,this,NewNode,samegroup,i1,i2);
switch (samegroup) {
case 0: this->swap(this,0, this,i1);
        NewNode->swap(NewNode,0, NewNode,i2);
        break;
case 1: this->swap(this,0, this,i1);
        NewNode->swap(NewNode,0, this,i2);
        break;
default: print_error("Nonleaf::InsertMightSplit","Invalid group flag");
}

n1=MaxSize(Stats);
n2=1;
head1=1; head2=1;
ent1 = this->entry[0];
ent2 = NewNode->entry[0];
while (head1<n1) {
        d1 = distance(Stats->BDtype,ent1,this->entry[head1]);
        d2 = distance(Stats->BDtype,ent2,this->entry[head1]);
        if (d1<d2) {
                ent1+=this->entry[head1];
                head1++;
                }
        else {  this->assign(NewNode,head2,this,head1);
                this->assign(this,head1,this,n1-1);
                ent2+=NewNode->entry[head2];
                head2++;
                n1--;
                n2++;
                }
        }
this->actsize=n1;
NewNode->actsize=n2;
return(NewNode);
}

void Nonleaf::MergeMightResplit(Stat *Stats, int i, int j)
{
int     head1,head2,k,j1,j2,n1,n2,total;
short   samegroup;
double  d1, d2;

Entry   ent1,ent2;
ent1.Init(Stats->Dimension);
ent2.Init(Stats->Dimension);

n1 = child[i]->ActSize();
n2 = child[j]->ActSize();
total = n1+n2;

// Merge: no overflow
if (total<child[i]->MaxSize(Stats)) {

        child[i]->AssignActSize(total);
        entry[i]+=entry[j];
        for (k=0;k<n2;k++)
                child[i]->assign(child[i],n1+k, child[j],k);
        child[j]->AssignNextPrev(Stats);
        delete child[j];
        Stats->MemUsed--;
        Stats->TreeSize--;
        this->assign(this,j, this,actsize-1);
        actsize--;
        }

// ReSplit: overflow
else {

//farthest pair in tmpentry
child[i]->FarthestTwoOut(Stats, child[i],child[j],samegroup,j1,j2);
switch (samegroup) {
case 0: child[i]->swap(child[i],0, child[i],j1);
        child[j]->swap(child[j],0, child[j],j2);
        break;
case 1: child[i]->swap(child[i],0, child[i],j1);
        child[j]->swap(child[j],0, child[i],j2);
        break;
case 2: child[i]->swap(child[i],0, child[j],j1);
        child[j]->swap(child[j],0, child[j],j2);
        break;
default: print_error("Nonleaf::MergeMightResplit","Invalid group flag");
}

head1=1; head2=1;
ent1 = *(child[i]->TheEntry(0));
ent2 = *(child[j]->TheEntry(0));
while (head1<n1 && head2<n2) {
   d1 = distance(Stats->BDtype,ent1,*(child[i]->TheEntry(head1)));
   d2 = distance(Stats->BDtype,ent2,*(child[i]->TheEntry(head1)));
   if (d1<d2) {
        ent1+=*(child[i]->TheEntry(head1));
        head1++;
        }
   else {
        child[i]->swap(child[i],head1,child[j],head2);
        ent2 += *(child[j]->TheEntry(head2));
        head2++;
        }
    }

// child[j] has left over
if (head1>=n1 && head2<n2) {
  while (head2<n2 && n1<child[i]->MaxSize(Stats)) {
   d1 = distance(Stats->BDtype,ent1,*(child[j]->TheEntry(head2)));
   d2 = distance(Stats->BDtype,ent2,*(child[j]->TheEntry(head2)));
   if (d2<d1) {
        ent2 += *(child[j]->TheEntry(head2));
        head2++;
        }
   else {
        child[i]->assign(child[i],head1,child[j],head2);
        child[j]->assign(child[j],head2,child[j],n2-1);
        ent1+=*(child[i]->TheEntry(head1));
        head1++;
        n1++;
        n2--;
        }
   }
}

// child[i] has left over
else if (head1<n1 && head2>=n2) {
   while (head1<n1 && n2<child[j]->MaxSize(Stats)) {
    d1 = distance(Stats->BDtype,ent1,*(child[i]->TheEntry(head1)));
    d2 = distance(Stats->BDtype,ent2,*(child[i]->TheEntry(head1)));
    if (d1<d2) {
        ent1 += *(child[i]->TheEntry(head1));
        head1++;
        }
    else {
        child[j]->assign(child[j],head2,child[i],head1);
        child[i]->assign(child[i],head1,child[i],n1-1);
        ent2+=*(child[j]->TheEntry(head2));
        head2++;
        n1--;
        n2++;
        }
    }
}
child[i]->AssignActSize(n1);
child[j]->AssignActSize(n2);
child[i]->CF(entry[i]);
child[j]->CF(entry[j]);
}
}

void Leaf::FarthestTwoOut(Stat *Stats, Node *node1, Node *node2,
                          short &samegroup, int &i, int &j) const
{
int     i1, i2, j1, j2, n1, n2;
double  d, dmax;
Leaf    *leaf1, *leaf2;

leaf1 = (Leaf *)node1;
leaf2 = (Leaf *)node2;
n1 = leaf1->ActSize();
n2 = leaf2->ActSize();

if (n1==0 || n2==0)
        print_error("Leaf::FarthestTwoOut","empty node");

samegroup = 0;
i=0; j=0;
dmax = distance(Stats->BDtype,leaf1->entry[i],leaf2->entry[j]);

for (i1=0; i1<n1; i1++)
        for (i2=0; i2<n2; i2++) {
           d = distance(Stats->BDtype,leaf1->entry[i1],leaf2->entry[i2]);
           if (d>dmax) {
                i=i1;
                j=i2;
                dmax=d;
                }
        }
for (i1=0; i1<n1-1; i1++)
        for (j1=i1+1; j1<n1; j1++) {
           d = distance(Stats->BDtype,leaf1->entry[i1],leaf1->entry[j1]);
           if (d>dmax) {
                i=i1;
                j=j1;
                dmax=d;
                samegroup=1;
                }
        }
for (i2=0; i2<n2-1; i2++)
        for (j2=i2+1; j2<n2; j2++) {
           d = distance(Stats->BDtype,leaf2->entry[i2],leaf2->entry[j2]);
           if (d>dmax) {
                i=i2;
                j=j2;
                dmax=d;
                samegroup=2;
                }
        }
}


void Nonleaf::FarthestTwoOut(Stat *Stats, Node *node1, Node *node2,
                             short &samegroup, int &i, int &j) const
{
int     i1, i2, j1, j2, n1, n2;
double  d, dmax;
Nonleaf *nleaf1, *nleaf2;

nleaf1 = (Nonleaf *)node1;
nleaf2 = (Nonleaf *)node2;
n1 = nleaf1->ActSize();
n2 = nleaf2->ActSize();

if (n1==0 || n2==0)
        print_error("Nonleaf::FarthestTwoOut","empty node");

samegroup = 0;
i=0; j=0;
dmax=distance(Stats->BDtype,nleaf1->entry[i],nleaf2->entry[j]);

for (i1=0; i1<n1; i1++)
   for (i2=0; i2<n2; i2++) {
        d = distance(Stats->BDtype,nleaf1->entry[i1],nleaf2->entry[i2]);
        if (d>dmax) {
                i=i1;
                j=i2;
                dmax=d;
                }
        }
for (i1=0; i1<n1-1; i1++)
    for (j1=i1+1; j1<n1; j1++) {
        d = distance(Stats->BDtype,nleaf1->entry[i1],nleaf1->entry[j1]);
        if (d>dmax) {
                i=i1;
                j=j1;
                dmax=d;
                samegroup=1;
                }
        }
for (i2=0; i2<n2-1; i2++)
    for (j2=i2+1; j2<n2; j2++) {
        d = distance(Stats->BDtype,nleaf2->entry[i2],nleaf2->entry[j2]);
        if (d>dmax) {
                i=i2;
                j=j2;
                dmax=d;
                samegroup=2;
                }
        }
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -