⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 status.c

📁 数据挖掘经典的hierarchial clustering algorithm
💻 C
📖 第 1 页 / 共 2 页
字号:
        fo<<"Bars\t";
        for (short i=0;i<Stats->Dimension;i++)
                fo<<Stats->Bars[i]<<"\t";
        fo<<endl;
        }

fo<<"K\t"<<Stats->K<<endl;
fo<<"InitFt\t"<<Stats->InitFt<<endl;
fo<<"Ft\t"<<Stats->Ft<<endl;
fo<<"Gtype\t"<<Stats->Gtype<<endl;
fo<<"GDtype\t"<<Stats->GDtype<<endl;
fo<<"Qtype\t"<<Stats->Qtype<<endl;
fo<<"RefineAlg\t"<<Stats->RefineAlg<<endl;
fo<<"NoiseFlag\t"<<Stats->NoiseFlag<<endl;
fo<<"MaxRPass\t"<<Stats->MaxRPass<<endl;

fo<<"Phase\t"<<Stats->Phase<<endl;
fo<<"Pass\t"<<Stats->Passi<<endl;
fo<<"CurFt\t"<<sqrt(Stats->CurFt)<<endl;
fo<<"MemUsed \t"<<Stats->MemUsed<<endl;
fo<<"TreeSize\t"<<Stats->TreeSize<<endl;

fo<<"PrevEntryCnt\t"<<Stats->PrevEntryCnt<<endl;
fo<<"CurrEntryCnt\t"<<Stats->CurrEntryCnt<<endl;
fo<<"PrevDataCnt\t"<<Stats->PrevDataCnt<<endl;
fo<<"CurrDataCnt\t"<<Stats->CurrDataCnt<<endl;

fo<<"AvgDensity\t"<<Stats->AvgDensity<<endl;
fo<<"NoiseCnt\t"<<Stats->NoiseCnt<<endl;

fo<<"Ranges\t"<<Stats->Ranges<<endl;

if (Stats->Phase==1 || Stats->Phase==2)
        if (Stats->OldRoot) Stats->OldRoot->Print_Summary(Stats,fo);

if (Stats->SplitBuffer!=NULL)
fo<<"SplitBuffer\n"<<Stats->SplitBuffer<<endl;
if (Stats->OutlierQueue!=NULL)
fo<<"OutlierQueue\n"<<Stats->OutlierQueue<<endl;
if (Stats->OStats!=NULL)
fo<<"OutlierTree Status\n"<<Stats->OStats<<endl;

fo<<"OutlierEntryCnt\t"<<Stats->OutlierEntryCnt<<endl;
fo<<"OutlierTupleCnt\t"<<Stats->OutlierTupleCnt<<endl;

return fo;
}

ofstream& operator<<(ofstream &fo,Stat* Stats) {
fo<<"***************Status of "<<Stats->name<<endl;
if (strcmp(Stats->name,"outlier")!=0) {
fo<<"WMflag\t"<<Stats->WMflag<<endl;
fo<<"W\t"<<Stats->W<<endl;
fo<<"M\t"<<Stats->M<<endl;
}
fo<<"Dimension\t"<<Stats->Dimension<<endl;
fo<<"PageSize\t"<<Stats->PageSize<<endl;
fo<<"MemSize\t"<<Stats->MemSize<<endl;
fo<<"BufferSize\t"<<Stats->BufferSize<<endl;
fo<<"QueueSize\t"<<Stats->QueueSize<<endl;
fo<<"OutlierTreeSize\t"<<Stats->OutlierTreeSize<<endl;

fo<<"BDtype\t"<<Stats->BDtype<<endl;
fo<<"Ftype\t"<<Stats->Ftype<<endl;
fo<<"Phase1Scheme\t"<<Stats->Phase1Scheme<<endl;
fo<<"RebuiltAlg\t"<<Stats->RebuiltAlg<<endl;
fo<<"StayTimes\t"<<Stats->StayTimes<<endl;

fo<<"NoiseRate\t"<<Stats->NoiseRate<<endl;

fo<<"Range\t"<<Stats->Range<<endl;

fo<<"CFDistr\t"<<Stats->CFDistr<<endl;
fo<<"H\t"<<Stats->H<<endl;

if (Stats->Bars!=NULL) {
        fo<<"Bars\t";
        for (short i=0;i<Stats->Dimension;i++)
                fo<<Stats->Bars[i]<<"\t";
        fo<<endl;
        }

fo<<"K\t"<<Stats->K<<endl;
fo<<"InitFt\t"<<Stats->InitFt<<endl;
fo<<"Ft\t"<<Stats->Ft<<endl;
fo<<"Gtype\t"<<Stats->Gtype<<endl;
fo<<"GDtype\t"<<Stats->GDtype<<endl;
fo<<"Qtype\t"<<Stats->Qtype<<endl;
fo<<"RefineAlg\t"<<Stats->RefineAlg<<endl;
fo<<"NoiseFlag\t"<<Stats->NoiseFlag<<endl;
fo<<"MaxRPass\t"<<Stats->MaxRPass<<endl;

fo<<"Phase\t"<<Stats->Phase<<endl;
fo<<"Pass\t"<<Stats->Passi<<endl;
fo<<"CurFt\t"<<sqrt(Stats->CurFt)<<endl;
fo<<"MemUsed \t"<<Stats->MemUsed<<endl;
fo<<"TreeSize\t"<<Stats->TreeSize<<endl;

fo<<"PrevEntryCnt\t"<<Stats->PrevEntryCnt<<endl;
fo<<"CurrEntryCnt\t"<<Stats->CurrEntryCnt<<endl;
fo<<"PrevDataCnt\t"<<Stats->PrevDataCnt<<endl;
fo<<"CurrDataCnt\t"<<Stats->CurrDataCnt<<endl;

fo<<"AvgDensity\t"<<Stats->AvgDensity<<endl;
fo<<"NoiseCnt\t"<<Stats->NoiseCnt<<endl;

fo<<"Ranges\t"<<Stats->Ranges<<endl;

if (Stats->Phase==1 || Stats->Phase==2)
        if (Stats->OldRoot) Stats->OldRoot->Print_Summary(Stats,fo);

if (Stats->SplitBuffer!=NULL)
fo<<"SplitBuffer\n"<<Stats->SplitBuffer<<endl;
if (Stats->OutlierQueue!=NULL)
fo<<"OutlierQueue\n"<<Stats->OutlierQueue<<endl;
if (Stats->OStats!=NULL)
fo<<"OutlierTree Status\n"<<Stats->OStats<<endl;

fo<<"OutlierEntryCnt\t"<<Stats->OutlierEntryCnt<<endl;
fo<<"OutlierTupleCnt\t"<<Stats->OutlierTupleCnt<<endl;

return fo;
}

// sqrt_ed:

double Stat::AvgRDScanRoot() {
Entry tmpent;
tmpent.Init(Dimension);
OldRoot->CF(tmpent);
return sqrt(tmpent.Fitness(Ftype));
}

double Stat::AbsVScanLeafEntry() {
int    i;
Leaf*  tmp=NewLeafHead;
double abs_v=0.0;
while (tmp!=NULL) {
        for (i=0;i<tmp->actsize;i++)
           abs_v+=pow(sqrt(tmp->entry[i].Fitness(Ftype)),Dimension);
        tmp=tmp->next;
        }
return abs_v;
}

double Stat::AbsVScanLeafNode() {
Leaf*   tmp=NewLeafHead;
double  abs_v=0.0;
while (tmp!=NULL) {
        abs_v+=pow(sqrt(tmp->Fitness(Ftype)),Dimension);
        tmp=tmp->next;
        }
return abs_v;
}

double Stat::AvgDNNScanLeafEntry(short dtype) {
Leaf*   tmp=NewLeafHead;
int     i,j,total_n=0;
double  *dmin;
double  d,total_d=0.0;

while (tmp!=NULL) {
  if (tmp->actsize>1) {
    dmin=new double[tmp->actsize];
    for (i=0; i<tmp->actsize; i++) dmin[i]=HUGE_DOUBLE;
    total_n+=tmp->actsize;
    for (i=0; i<tmp->actsize-1; i++)
      for (j=i+1; j<tmp->actsize; j++) {
           d=distance(dtype,tmp->entry[i],tmp->entry[j]);
           if (d>=0) d=sqrt(d); else d=0.0;
           if (d<dmin[i]) dmin[i]=d;
           if (d<dmin[j]) dmin[j]=d;
           }
    for (i=0; i<tmp->actsize; i++) total_d+=dmin[i];
    delete [] dmin;
    }
  tmp=tmp->next;
  }
return total_d/total_n;
}

double Stat::FtSurvey1()
{
int i,j;

// approximate crowdest leaf node
Node *node = OldRoot->DenseNode();

if (node==NULL)
   print_error("FtSurvey1","crowded place wrong");

// merge the closest pair
return sqrt(node->ClosestDiffTwo(this,i,j));
}

double Stat::FtSurvey2()
{
Entry tmpent;
tmpent.Init(Dimension);

// approximate crowdest leaf node
Node *node = OldRoot->DenseNode();

if (node==NULL)
   print_error("FtSurvey2","crowded place wrong");

// merge the whole leaf node
node->CF(tmpent);
return sqrt(tmpent.Fitness(Ftype));
}

// more general in terms of distortion:
// distortpercent=0% ==> FtSurvey1
// distortpercent=100% ==> FtSurvey2
double Stat::FtSurvey3(double distortpercent)
{
int i,j;
double oldV, newV, origin;

// approximate crowdest leaf node
Node *node = OldRoot->DenseNode();

if (node==NULL)
   print_error("FtSurvey3","crowded place is wrong");

if (node->actsize==1)
   print_error("FtSurvey3","only one leaf entry exists in crowded place");

origin=0;
for (i=0;i<node->actsize;i++)
        origin+=node->entry[i].n*node->entry[i].Radius();

// merge to form a natural cluster hierarchy in the leaf node
Hierarchy *h = new Hierarchy(node->actsize-1, Dimension);
h->MergeHierarchy(GDtype, node->actsize,node->Entries());

// split on the hierarchy to reach distortpercent
oldV = h->DistortionEdge(node->Entries());
do {  newV = h->DistortionEdge(node->Entries());
      if ((newV-origin)/(oldV-origin)<=distortpercent||
          h->chainptr==node->actsize-2)
          break;
     } while (h->SplitHierarchy(BY_KCLUSTERS,Ftype,Ft)==TRUE);

// choose the cluster with MinFt along the split Edge
double FutFt=h->MinFtMerged(Ftype);

// replace entries in the node by newentries after physically merging the cluster
int leafsize = node->MaxSize(this);
Entry *newentry = new Entry[leafsize];
for (i=0; i<leafsize; i++) newentry[i].Init(Dimension);

j=0; newentry[0]=h->cf[-h->chain[0]-1];
// find the leaf entries forming that cluster and label them
while (h->SplitHierarchy(BY_KCLUSTERS,Ftype,Ft)==TRUE);
for (i=0; i<=h->chainptr; i++) {
        if (h->chain[i]>0) {
                node->entry[h->chain[i]-1].n=0;
                }
        }

for (i=0; i<node->actsize; i++) {
        if (node->entry[i].n>0) {
                newentry[++j]=node->entry[i];
                }
        }
delete [] node->entry;
node->actsize = j+1;
node->entry=newentry;
return sqrt(FutFt);
}

// Scan Leaf Nodes:
// AvgDensity

void Stat::MakeNewTree() {

PrevEntryCnt=CurrEntryCnt;
PrevDataCnt=CurrDataCnt;

CurrEntryCnt=0;

// CurrDataCnt keep going up unless specified

TreeSize=OldRoot->Depth();
MemUsed+=TreeSize;
Node *tmpnode;

// initialize new tree
if (TreeSize==1) {
   tmpnode=new Leaf(this);
   tmpnode->actsize=0;
   NewRoot=tmpnode;
   NewLeafHead=(Leaf*)tmpnode;
   OldLeafHead=(Leaf*)tmpnode;
}
else {
   tmpnode=new Nonleaf(this);
   tmpnode->actsize=1;
   NewRoot=tmpnode;
   for (int i=0; i<TreeSize-2; i++) {
        tmpnode->NewNonleafChildI(this,0);
        tmpnode=tmpnode->TheChild(0);
        tmpnode->actsize=1;
        }
   tmpnode->NewLeafChildI(this,0);
   tmpnode=tmpnode->TheChild(0);
   tmpnode->actsize= 0;
   NewLeafHead=(Leaf*)tmpnode;
   OldLeafHead=(Leaf*)tmpnode;
   }
}

void Stat::MarkNewTree() {

PrevEntryCnt=CurrEntryCnt;
PrevDataCnt=CurrDataCnt;

CurrEntryCnt = 0;
// CurrDataCnt keep going up unless specified

OldLeafHead = NewLeafHead;
}

void Stat::StartNewTree() {

PrevEntryCnt=CurrEntryCnt;
PrevDataCnt=CurrDataCnt;

CurrEntryCnt = 0;
// CurrDataCnt keep going up unless specified

if (OldRoot) OldRoot->free_nonleaf(this);
OldRoot = new Leaf(this);
NewRoot = OldRoot;
MemUsed++;
TreeSize = 1;
OldLeafHead = NewLeafHead;
NewLeafHead = (Leaf *)OldRoot;
}

void Stat::CreateNewRoot(Node *child1, Node *child2) {
NewRoot=new Nonleaf(this); MemUsed++; TreeSize++;
NewRoot->AssignActSize(2);
NewRoot->AssignChild(0,child1);
child1->CF(*(NewRoot->TheEntry(0)));
NewRoot->AssignChild(1,child2);
child2->CF(*(NewRoot->TheEntry(1)));
}

int Stat::EntrySize() const {
#ifdef RECTANGLE
return sizeof(int)+sizeof(double)*(3*Dimension+1);
#else
return sizeof(int)+sizeof(double)*(Dimension+1);
#endif
}

// shift the tree:
void Stat::ShiftTree2()
{
int i;

Entry ent;
ent.Init(Dimension);

Node *tmpnode;

MakeNewTree();

int height = OldRoot->Depth();

Path CurrPath(height), BestPath(height);

// initialize CurrPath to the leftmost path (leaf entry) in old tree
tmpnode=OldRoot;
for (i=0; i<height; i++) {
        CurrPath.Push(0,tmpnode);
        tmpnode=tmpnode->TheChild(0);
        }

tmpnode=CurrPath.TopLeaf();

while (tmpnode!=NULL) {

  // Process all entries in the leaf node
  for (i=0; i<tmpnode->actsize; i++) {
      ent=tmpnode->entry[i];
      // find BestPath for current entry in new tree
      BestPath.Reset();
      if (NewRoot->BestFitPath2(this,ent,BestPath)==TRUE)
                BestPath.AddonPath(this,ent,NewRoot);
      else  CurrPath.AddonLeaf(this,ent,NewRoot);
      }

   // Process next leaf node
   tmpnode=CurrPath.NextRightLeafFreeSpace(this);
   if (tmpnode!=NULL) CurrPath.InsertLeaf(this,NewRoot);
   }

OldRoot=NewRoot;
OldLeafHead=NewLeafHead;
NewRoot->FreeEmptyNode(this);
}

// compact the tree:
void Stat::CompactTree2()
{
int i;

Entry ent;
ent.Init(Dimension);

MarkNewTree();

int height = OldRoot->Depth();

Path CurrPath(height), BestPath(height);

// initialize to the leftmost path (or leaf entry) in the tree
Node *tmpnode=OldRoot;
for (i=0; i<height; i++) {
        CurrPath.Push(0,tmpnode);
        tmpnode=tmpnode->TheChild(0);
        }

while (CurrPath.Exists()) {

        // takeoff current path (or leaf entry) from the tree
        ent=*(CurrPath.TopLeafEntry());
        CurrPath.TakeoffPath(ent);

        // find bestpath for current leaf entry in tree and put back
        BestPath.Reset();
        if (OldRoot->BestFitPath2(this,ent,BestPath)==TRUE
            && BestPath<CurrPath) {
                BestPath.AddonPath(this,ent,OldRoot);
                CurrPath.CollectSpace(this);
                }
        else {  CurrPath.AddonPath(this,ent,OldRoot);
                CurrEntryCnt++;
                CurrPath.NextRightPath();
                }
        }
}

// responsible for old leaves
void Stat::ScanLeaf2()
{
int k = 0;

Entry ent;
ent.Init(Dimension);

short res=TRUE;

StartNewTree();

while (res!=FALSE) {
     res = NextEntryFreeOldLeafHead(k,ent);
     if (res==TRUE) {
                OldRoot->AdjustTree(this,ent);
                OldRoot = NewRoot;
                }
      }
}

void Stat::Accept2(const Entry &ent)
{
// keep trying until accepted anyway

while (1) {

  // 1: within range, accepted
  if (CurrEntryCnt<=Range) {
        CurrDataCnt+=ent.n;
        OldRoot->AdjustTree(this,ent);
        OldRoot = NewRoot;
        return;
        }

  // 2: out of range, increase threshold, rebuild tree
cout<<"#"<<name<<" "<<Phase<<" "<<Passi<<" "<<MemUsed<<" "
    <<CurrDataCnt<<" "<<CurrEntryCnt<<" "<<sqrt(CurFt)<<endl;

  RebuiltTree2(1);

cout<<"#"<<name<<" "<<Phase<<" "<<Passi<<" "<<MemUsed<<" "
    <<CurrDataCnt<<" "<<CurrEntryCnt<<" "<<sqrt(CurFt)<<endl;
  }
}

void Stat::RebuiltTree2(short inc_flag)
{
if (inc_flag==1 && (Passi+1)%StayTimes==0) SelectFtA();

Passi++;
switch (RebuiltAlg) {
 case 0: ScanLeaf2(); break;
 case 1: CompactTree2(); break;
 case 2: ShiftTree2(); break;
 }
}

void Stat::SelectInitFt2()
{

CurFt=MaxOne(CurFt,pow(AbsVScanLeafEntry()/Range,2.0/Dimension));

/*
ConNode *contree;
contree=NewRoot->Copy(this);
contree->Connect(this);
CurFt=MaxOne(CurFt,pow(contree->CoverUpLeafNodes(Range*1.0/CurrEntryCnt),2.0));
contree->Free();
*/
}

short Stat::NextEntryFromLeafHead(int &pos, Entry &ent, Leaf **tmp) {
while ((*tmp)!=NULL && pos>=(*tmp)->actsize) {
        pos=0;
        (*tmp)=(*tmp)->NextLeaf();
        }
if ((*tmp)==NULL) return FALSE;
ent=*((*tmp)->TheEntry(pos));
pos++;
if (pos==(*tmp)->actsize) {
        pos=0;
        *tmp=(*tmp)->NextLeaf();
        }
return TRUE;
}

short Stat::NextEntryFreeOldLeafHead(int &pos, Entry &ent) {
Leaf *tmp;
while (OldLeafHead!=NULL && pos>=OldLeafHead->actsize) {
        pos = 0;
        tmp = OldLeafHead;
        OldLeafHead = OldLeafHead->NextLeaf();
        delete tmp; MemUsed--;
        }

if (OldLeafHead==NULL) return FALSE;

ent=*(OldLeafHead->TheEntry(pos));
pos++;
if (pos==OldLeafHead->actsize) {
        pos=0;
        tmp = OldLeafHead;
        OldLeafHead = OldLeafHead->NextLeaf();
        delete tmp; MemUsed--;
        }
return TRUE;
}

short Stat::NextEntryFreeRestLeafPtr(int &pos, Entry &ent) {
Leaf *tmp;
while (RestLeafPtr!=NULL && pos>=RestLeafPtr->actsize) {
        pos = 0;
        tmp = RestLeafPtr;
        RestLeafPtr = RestLeafPtr->NextLeaf();
        delete tmp; MemUsed--;
        }

if (RestLeafPtr==NULL) return FALSE;

ent=*(RestLeafPtr->TheEntry(pos));
pos++;
if (pos==RestLeafPtr->actsize) {
        pos=0;
        tmp = RestLeafPtr;
        RestLeafPtr = RestLeafPtr->NextLeaf();
        delete tmp; MemUsed--;
        }
return TRUE;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -