⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 status.c

📁 数据挖掘经典的hierarchial clustering algorithm
💻 C
📖 第 1 页 / 共 2 页
字号:
/****************************************************************
File Name: status.C
Author: Tian Zhang, CS Dept., Univ. of Wisconsin-Madison, 1995

               Copyright(c) 1995 by Tian Zhang

                   All Rights Reserved

Permission to use, copy and modify this software must be granted
by the author and provided that the above copyright notice appear
in all relevant copies and that both that copyright notice and this
permission notice appear in all relevant supporting documentations.

Comments and additions may be sent the author at zhang@cs.wisc.edu.

******************************************************************/

#include "global.h"
#include "util.h"
#include "vector.h"
#include "rectangle.h"
#include "cfentry.h"
#include "cutil.h"
#include "parameter.h"
#include "status.h"
#include "cftree.h"
#include "path.h"
#include "contree.h"
#include "buffer.h"
#include "recyqueue.h"

#include "hierarchy.h"

Stat::Stat(char *str) {

// initialize dynamic status information

strcpy(name,str);

Bars=NULL;

Phase=1;
Passi=0;
CurFt=0.0;
MemUsed=0;
TreeSize=0;

PrevEntryCnt = 0;
CurrEntryCnt = 0;
PrevDataCnt = 0;
CurrDataCnt = 0;

NoiseCnt = 0;

AvgDensity = 0;

OldRoot = NULL;
NewRoot = NULL;

OldLeafHead = NULL;
NewLeafHead = NULL;

RestLeafPtr = NULL;
RestLeafK = 0;

SplitBuffer = NULL;
OutlierQueue = NULL;
OStats=NULL;

Entries = NULL;

OutlierEntryCnt=0;
OutlierTupleCnt=0;
}

Stat::~Stat() {
if (Bars) delete [] Bars;
if (NewRoot!=NULL) NewRoot->free_nonleaf(this);
if (NewLeafHead!=NULL) NewLeafHead->free_leaf(this);
if (SplitBuffer!=NULL) delete SplitBuffer;
if (OutlierQueue!=NULL) delete OutlierQueue;
if (OStats!=NULL) delete OStats;
if (Entries) delete [] Entries;
}

void Stat::Accept1A(const Entry &ent)
{
// keep trying until accepted anyway
while (1) {

  // 1: memory available, accepted
  if (MemUsed<=MemSize) {
        CurrDataCnt+=ent.n;
        Ranges+=ent.sx;         // valid only for Stats, not for OStats
        OldRoot->AdjustTree(this,ent);
        OldRoot=NewRoot;
        return;
        }

  // 2: buffer splits: accepted
  if (SplitBuffer!=NULL && !SplitBuffer->Full()) {
        CurrDataCnt+=ent.n;
        Ranges+=ent.sx;         // valid only for Stats, not for OStats
        if (OldRoot->AbsorbEntry2(this,ent)==FALSE)
                SplitBuffer->AddEnt(ent);
        return;
        }

  // 3: memory out and buffer full:
  //    increase threshold, throw outliers, rebuild tree,
  //    re-try to see if accepted

cout<<"#"<<name<<" "<<Phase<<" "<<Passi<<" "<<MemUsed<<" "
    <<CurrDataCnt<<" "<<CurrEntryCnt<<" "<<sqrt(CurFt)<<endl;

  RebuiltTree1A(1);

cout<<"#"<<name<<" "<<Phase<<" "<<Passi<<" "<<MemUsed<<" "
    <<CurrDataCnt<<" "<<CurrEntryCnt<<" "<<sqrt(CurFt)<<endl;

  if (SplitBuffer!=NULL) ScanSplitBuffer();
  if (OutlierQueue!=NULL && OutlierQueue->Full()) ScanOutlierQueue();
  }
}

void Stat::Accept1B(const Entry &ent)
{
// keep trying until accepted anyway
while (1) {

  // 1: memory available, accepted
  if (MemUsed<=MemSize) {
        CurrDataCnt+=ent.n;
        Ranges+=ent.sx;         // valid only for Stats, not for OStats
        OldRoot->AdjustTree(this,ent);
        OldRoot=NewRoot;
        return;
        }

  // 2: buffer splits: accepted
  if (SplitBuffer!=NULL && !SplitBuffer->Full()) {
        CurrDataCnt+=ent.n;
        Ranges+=ent.sx;         // valid only for Stats, not for OStats
        if (OldRoot->AbsorbEntry2(this,ent)==FALSE)
                SplitBuffer->AddEnt(ent);
        return;
        }

  // 3: memory out and buffer full:
  //    increase threshold, throw outliers, rebuild tree,
  //    re-try to see if accepted

cout<<"#"<<name<<" "<<Phase<<" "<<Passi<<" "<<MemUsed<<" "
    <<CurrDataCnt<<" "<<CurrEntryCnt<<" "<<sqrt(CurFt)<<endl;

  RebuiltTree1B(1);

cout<<"#"<<name<<" "<<Phase<<" "<<Passi<<" "<<MemUsed<<" "
    <<CurrDataCnt<<" "<<CurrEntryCnt<<" "<<sqrt(CurFt)<<endl;

  if (SplitBuffer!=NULL) ScanSplitBuffer();
  if (OutlierQueue!=NULL && OutlierQueue->Full()) ScanOutlierQueue();
  }
}

// TZ: work here
void Stat::SelectInitFt1()
{ if (InitFt<=0.0) CurFt=0.0; else CurFt=InitFt*InitFt; }

void Stat::SelectFtB()
{
   if (CurFt==0.0)
        CurFt=pow(AvgDNNScanLeafEntry(BDtype),2.0);
   else CurFt=MaxOne(CurFt,pow(AvgDNNScanLeafEntry(BDtype),2.0));
}

void Stat::SelectFtA()
{
   if (CurFt==0.0)
        CurFt=pow(AvgDNNScanLeafEntry(BDtype),2.0);
   else CurFt=MaxOne(CurFt,pow(AvgDNNScanLeafEntry(BDtype),2.0));
}

void Stat::RebuiltTree1B(short inc_flag)
{
        AvgDensity=1.0*NewRoot->N()/(1.0*CurrEntryCnt);

        if (inc_flag==1 && Passi%StayTimes==0) SelectFtB();

        Passi++;
        switch (RebuiltAlg) {
                 case 0: ScanLeaf1A(); break;
                 case 1: CompactTree1A(); break;
                 case 2: ShiftTree1A(); break;
                 }
}

void Stat::RebuiltTree1A(short inc_flag)
{
        AvgDensity=1.0*NewRoot->N()/(1.0*CurrEntryCnt);

        if (inc_flag==1 && Passi%StayTimes==0) SelectFtA();

        Passi++;
        switch (RebuiltAlg) {
                 case 0: ScanLeaf1A(); break;
                 case 1: CompactTree1A(); break;
                 case 2: ShiftTree1A(); break;
                 }
}

// shift the tree:
void Stat::ShiftTree1A()
{
int i;

Entry ent;
ent.Init(Dimension);

Node *tmpnode;

MakeNewTree();

int height=OldRoot->Depth();

Path CurrPath(height), BestPath(height);

// initialize CurrPath to the leftmost path (leaf entry) in old tree

tmpnode=OldRoot;
for (i=0; i<height; i++) {
        CurrPath.Push(0,tmpnode);
        tmpnode=tmpnode->TheChild(0);
        }

tmpnode=CurrPath.TopLeaf();

while (tmpnode!=NULL) {

  // Process all entries in the leaf node
  for (i=0; i<tmpnode->actsize; i++) {
      ent=tmpnode->entry[i];
      if (strcmp(name,"outlier")!=0 &&
          ent.n<NoiseRate*AvgDensity &&
          OutlierQueue!=NULL) // write out all qualified outliers
                OutlierQueue->AddEnt(ent,this);
      else {
          // find BestPath for current entry in new tree
          BestPath.Reset();
          if (NewRoot->BestFitPath2(this,ent,BestPath)==TRUE)
                BestPath.AddonPath(this,ent,NewRoot);
          else  CurrPath.AddonLeaf(this,ent,NewRoot);
          }
      }

  // Process next leaf node
  tmpnode=CurrPath.NextRightLeafFreeSpace(this);
  if (tmpnode!=NULL) CurrPath.InsertLeaf(this,NewRoot);
  }

OldRoot=NewRoot;
OldLeafHead=NewLeafHead;
NewRoot->FreeEmptyNode(this);
}

// compact the tree:
void Stat::CompactTree1A()
{
int i;

Entry ent;
ent.Init(Dimension);

MarkNewTree();

int height = OldRoot->Depth();

Path CurrPath(height), BestPath(height);

// initialize to the leftmost path (or leaf entry) in the tree

Node *tmpnode=OldRoot;
for (i=0; i<height; i++) {
        CurrPath.Push(0,tmpnode);
        tmpnode=tmpnode->TheChild(0);
        }

while (CurrPath.Exists()) {

        // takeoff current path (or leaf entry) from the tree
        ent=*(CurrPath.TopLeafEntry());
        CurrPath.TakeoffPath(ent);

        if (strcmp(name,"outlier")!=0 &&
            ent.n<NoiseRate*AvgDensity &&
            OutlierQueue!=NULL) { // write out all qualified outliers
            OutlierQueue->AddEnt(ent,this);
            CurrPath.CollectSpace(this);
            }
        else {// find bestpath for current leaf entry in tree and put back
            BestPath.Reset();
            if (OldRoot->BestFitPath2(this,ent,BestPath)==TRUE
                && BestPath<CurrPath) {
                        BestPath.AddonPath(this,ent,OldRoot);
                        CurrPath.CollectSpace(this);
                        }
                else { CurrPath.AddonPath(this,ent,OldRoot);
                       CurrEntryCnt++;
                       CurrPath.NextRightPath();
                       }
            }
        }
}

// responsible for old leaves
// does not guarantee S2<=S1 if T2>=T1.
void Stat::ScanLeaf1A()
{
int k = 0;

Entry ent;
ent.Init(Dimension);

short res=TRUE;

StartNewTree();

while (res!=FALSE) {
     res = NextEntryFreeOldLeafHead(k,ent);
     if (res==TRUE) {
        if (strcmp(name,"outlier")!=0 &&
            ent.n<NoiseRate*AvgDensity &&
            OutlierQueue!=NULL) // write out all qualified outliers
                OutlierQueue->AddEnt(ent,this);
        else {
                OldRoot->AdjustTree(this,ent);
                OldRoot = NewRoot;
                }
        }
      }
}

void Stat::ScanSplitBuffer()
{
Entry ent;
ent.Init(Dimension);

int count=SplitBuffer->CountEntry();

while (count>0 && MemUsed<=MemSize) {
        SplitBuffer->DeleteEnt(ent);
        count--;
        OldRoot->AdjustTree(this,ent);
        OldRoot=NewRoot;
        }

while (count>0) {
        SplitBuffer->DeleteEnt(ent);
        count--;
        if (OldRoot->AbsorbEntry2(this,ent)==FALSE)
           if (OutlierQueue!=NULL)
                OutlierQueue->AddEnt(ent,this);
           else SplitBuffer->AddEnt(ent);
        }
}

void Stat::ScanOutlierQueue()
{
Entry ent;
ent.Init(Dimension);

int count=OutlierQueue->CountEntry();

// without secondary tree for outliers
if (OStats==NULL) {
      while (count>0) {
        OutlierQueue->DeleteEnt(ent);
        count--;
        if (OldRoot->AbsorbEntry1(this,ent)==FALSE)
                OutlierQueue->AddEnt(ent,this);
        }
      }

// with secondary tree for outliers
else {
     // if can't absorb by main tree, accept to outlier tree
     while (count>0) {
        OutlierQueue->DeleteEnt(ent);
        count--;
        if (OldRoot->AbsorbEntry1(this,ent)==FALSE) {
                switch (OStats->Phase1Scheme) {
                  case 0: OStats->Accept1A(ent); break;
                  case 1: OStats->Accept1B(ent); break;
                  default: print_error("ScanOutlierQueue","Invalid Phase1Scheme"); break;
                  }
                NoiseCnt+=ent.n;
                }
        }
     }
}

void Stat::Inherit(const Stat *Stats) {
        Dimension=Stats->Dimension;
        PageSize=Stats->PageSize;
        MemSize=Stats->OutlierTreeSize;
        BufferSize=0;
        QueueSize=0;
        OutlierTreeSize=0;
        BDtype=Stats->BDtype;
        Ftype=Stats->Ftype;
        Phase1Scheme=Stats->Phase1Scheme;
        RebuiltAlg=Stats->RebuiltAlg;
        StayTimes=Stats->StayTimes;
        NoiseRate=Stats->NoiseRate;
        Range=Stats->Range;
        CFDistr=Stats->CFDistr;
        H=Stats->H;
        K=Stats->K;
        InitFt=Stats->InitFt;
        Ft=Stats->Ft;
        Gtype=Stats->Gtype;
        GDtype=Stats->GDtype;
        Qtype=Stats->Qtype;
        RefineAlg=Stats->RefineAlg;
        NoiseFlag=Stats->NoiseFlag;
        MaxRPass=Stats->MaxRPass;
        Ranges.Init(Dimension);
        }

istream& operator>>(istream &fi,Stat *Stats) {
fi>>Stats->WMflag;
Stats->W.Init(Stats->Dimension);
fi>>Stats->W;
Stats->M.Init(Stats->Dimension);
fi>>Stats->M;

fi>>Stats->PageSize;
Stats->MemSize/=Stats->PageSize;
Stats->BufferSize/=Stats->PageSize;
Stats->QueueSize/=Stats->PageSize;
Stats->OutlierTreeSize/=Stats->PageSize;

fi>>Stats->BDtype;
fi>>Stats->Ftype;
fi>>Stats->Phase1Scheme;
fi>>Stats->RebuiltAlg;
fi>>Stats->StayTimes;

fi>>Stats->NoiseRate;

fi>>Stats->Range;

fi>>Stats->CFDistr;
fi>>Stats->H;

Stats->Bars=new int[Stats->Dimension];
for (int i=0;i<Stats->Dimension;i++)
        fi>>Stats->Bars[i];

fi>>Stats->K;
fi>>Stats->InitFt;
fi>>Stats->Ft;
fi>>Stats->Gtype;
fi>>Stats->GDtype;
fi>>Stats->Qtype;
fi>>Stats->RefineAlg;
fi>>Stats->NoiseFlag;
fi>>Stats->MaxRPass;

Stats->Ranges.Init(Stats->Dimension);

if (Stats->BufferSize>0)
        Stats->SplitBuffer=new BufferClass(Stats);
if (Stats->QueueSize>0) Stats->
        OutlierQueue=new RecyQueueClass(Stats);
if (Stats->OutlierTreeSize>0) {
        Stats->OStats=new Stat("outlier");
        Stats->OStats->Inherit(Stats);
        }

return fi;
}

ifstream& operator>>(ifstream &fi,Stat *Stats) {
fi>>Stats->WMflag;
Stats->W.Init(Stats->Dimension);
fi>>Stats->W;
Stats->M.Init(Stats->Dimension);
fi>>Stats->M;

fi>>Stats->PageSize;
Stats->MemSize/=Stats->PageSize;
Stats->BufferSize/=Stats->PageSize;
Stats->QueueSize/=Stats->PageSize;
Stats->OutlierTreeSize/=Stats->PageSize;

fi>>Stats->BDtype;
fi>>Stats->Ftype;
fi>>Stats->Phase1Scheme;
fi>>Stats->RebuiltAlg;
fi>>Stats->StayTimes;

fi>>Stats->NoiseRate;

fi>>Stats->Range;

fi>>Stats->CFDistr;
fi>>Stats->H;

Stats->Bars=new int[Stats->Dimension];
for (int i=0;i<Stats->Dimension;i++)
        fi>>Stats->Bars[i];

fi>>Stats->K;
fi>>Stats->InitFt;
fi>>Stats->Ft;
fi>>Stats->Gtype;
fi>>Stats->GDtype;
fi>>Stats->Qtype;
fi>>Stats->RefineAlg;
fi>>Stats->NoiseFlag;
fi>>Stats->MaxRPass;

Stats->Ranges.Init(Stats->Dimension);

if (Stats->BufferSize>0)
        Stats->SplitBuffer=new BufferClass(Stats);
if (Stats->QueueSize>0)
        Stats->OutlierQueue=new RecyQueueClass(Stats);
if (Stats->OutlierTreeSize>0) {
        Stats->OStats=new Stat("outlier");
        Stats->OStats->Inherit(Stats);
        }

return fi;
}

ostream& operator<<(ostream &fo,Stat** Stats) {
for (int i=0; i<Paras->ntrees; i++)
        fo<<Stats[i]<<endl;
return fo;
}

ofstream& operator<<(ofstream &fo,Stat** Stats) {
for (int i=0; i<Paras->ntrees; i++)
        fo<<Stats[i]<<endl;
return fo;
}

ostream& operator<<(ostream &fo,Stat* Stats) {
fo<<"***************Status of "<<Stats->name<<endl;
if (strcmp(Stats->name,"outlier")!=0) {
fo<<"WMflag\t"<<Stats->WMflag<<endl;
fo<<"W\t"<<Stats->W<<endl;
fo<<"M\t"<<Stats->M<<endl;
}
fo<<"Dimension\t"<<Stats->Dimension<<endl;
fo<<"PageSize\t"<<Stats->PageSize<<endl;
fo<<"MemSize\t"<<Stats->MemSize<<endl;
fo<<"BufferSize\t"<<Stats->BufferSize<<endl;
fo<<"QueueSize\t"<<Stats->QueueSize<<endl;
fo<<"OutlierTreeSize\t"<<Stats->OutlierTreeSize<<endl;

fo<<"BDtype\t"<<Stats->BDtype<<endl;
fo<<"Ftype\t"<<Stats->Ftype<<endl;
fo<<"Phase1Scheme\t"<<Stats->Phase1Scheme<<endl;
fo<<"RebuiltAlg\t"<<Stats->RebuiltAlg<<endl;
fo<<"StayTimes\t"<<Stats->StayTimes<<endl;

fo<<"NoiseRate\t"<<Stats->NoiseRate<<endl;

fo<<"Range\t"<<Stats->Range<<endl;

fo<<"CFDistr\t"<<Stats->CFDistr<<endl;
fo<<"H\t"<<Stats->H<<endl;

if (Stats->Bars!=NULL) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -