⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hashtree.c

📁 Entropy-based CLIQUE算法改进
💻 C
字号:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "IntList.h"
#include "HashTree.h"

//////////////////////////////////////////////////////////////////////

// Constructor
TreeNode::TreeNode(bool l, int d, int n)
:sta(NULL), len(0), depth(d), brFactor(n)
{
  int i;

  if (!l) {
    // Internal node
    subspace = NULL;
    child = new (TreeNode*)[n];
  } else {
    // Leaf node

    // Allocate the pointers to subspaces
    subspace = new (IntList*)[n];
    child = NULL;
    for (i=0; i<n; i++) subspace[i]=NULL;

    // We need to initialize the sta field
    sta = new EntStat[brFactor];
    for (i=0; i<brFactor; i++) {
      sta[i].entropy = 0;
      sta[i].interest = 0;
      sta[i].interest_gain = 0;
    }
  }
}

// Destructor
TreeNode::~TreeNode()
{
  int i;
  
  if (!isLeaf()) {
    // Internal node
    delete[] child;
  } else {
    // Leaf node
    for (i=0; i<len; i++) delete subspace[i];
    
    delete[] subspace;
    delete[] sta;
  }
}

// A hash function
int TreeNode::hashFunc(int i)
{
  return i%brFactor;
}

// Given a subspace, transver to the leaf node
TreeNode* TreeNode::transver(const IntList& i)
{
  TreeNode* curNode = this;
  int d = depth-1;

  
  while (!curNode->isLeaf()) {
    assert(d<i.get_len());
    
    curNode = curNode->child[hashFunc(i[d])];
    d++;
  }

  return curNode;
}

// Insert a subspace into the hash tree
void TreeNode::insert(const IntList& ss)
{
  TreeNode* leafNode;
  
  // find the leaf node to insert to
  leafNode = transver(ss);
  
  // check if it is full or not
  if (leafNode->len < leafNode->brFactor) {
    // Not full
    leafNode->subspace[leafNode->len] = new IntList;
    leafNode->subspace[leafNode->len]->copy(ss);
    leafNode->len++;
  } else {
    // Full
    
    // See if splitting is possible
    if (leafNode->depth<=ss.get_len()) {
      // Splitting
      leafNode->split();
      leafNode->insert(ss);
    } else {
      printf("Resize the leaf node...\n");
      leafNode->resize(leafNode->brFactor*2);
      leafNode->subspace[leafNode->len] = new IntList;
      leafNode->subspace[leafNode->len]->copy(ss);
      leafNode->len++;
    }
  }
}

// Split a leaf node
void TreeNode::split()
{
  IntList** reinsert;
  int oldlen, i;

  // Convert the leaf node into an internal node
  // Create children
  child = new (TreeNode*)[brFactor];
  for (i=0; i<brFactor; i++) {
    child[i] = new TreeNode(true,depth+1,brFactor);
  }
  
  // Convert to an internal node
  reinsert = subspace;
  oldlen = len;
  subspace = NULL;
  len = 0;

  // Reinsert the subspaces
  for (i=0; i<oldlen; i++) {
    insert(*reinsert[i]);
    setStat(*reinsert[i],sta[i]);
    delete reinsert[i];
  }
  
  // Release old subspaces
  delete[] reinsert;
  delete[] sta;
}

// Resize a leaf node
void TreeNode::resize(int newsize)
{
  IntList** temp_ss;
  EntStat*  temp_sta;

  if (!isLeaf()) {
    printf("TreeNode::resize(): Resize of internal node is invalid.\n");
    return;
  }
  
  if (newsize<=brFactor) {
    printf("TreeNode::resize(): Cannot reduce the size of a node.\n");
    return;
  }
  
  // Copy the old subspace and statistics
  temp_ss = subspace;
  temp_sta = sta;

  // Allocate new and larger space
  subspace = new (IntList*)[newsize];
  memcpy(subspace,temp_ss,sizeof(IntList*)*brFactor);
  sta = new EntStat[newsize];
  memcpy(sta,temp_sta,sizeof(EntStat)*brFactor);
  brFactor = newsize;
  
  // Delete temp data
  delete[] temp_ss;
  delete[] temp_sta;
}

// Remove subspace
void TreeNode::remove(const IntList& ss, int& no_ss)
{
  TreeNode* leafNode;
  int i;
  
  // Transver to leaf node
  leafNode = transver(ss);

  // Search the subspace to be deleted
  for (i=0; i<leafNode->len; i++) {
    if (ss==*leafNode->subspace[i]) {
      if (i==leafNode->len-1) {
        // Last subspace
        leafNode->len--;
      } else {
        // Not last subspace
        leafNode->subspace[i]->copy(*leafNode->subspace[leafNode->len-1]);
        leafNode->sta[i] = leafNode->sta[leafNode->len-1];
        leafNode->len--;
      }
      
      no_ss--;  // Update the count of no of subspaces
      return;  
    }
  }
  
  printf("TreeNode::remove(): Subspace not found\n");
}


// Show the content of a node
void TreeNode::show() const
{
  int i;

  if (isLeaf()) {
    // Leaf
    if (len>0) {
      //printf("%d:",depth);

      //printf("{");
      for (i=0; i<len; i++) {
        subspace[i]->show();
        if (sta!=NULL)
          printf("\t%.4f\t%.4f\t%.4f\n",sta[i].entropy,sta[i].interest,sta[i].interest_gain);
        //if (i!=len-1) printf(", ");
      }
      //printf("}\n");
    }
  } else {
    // Internal
    //printf("Internal node\n");
    for (i=0; i<brFactor; i++) {
      child[i]->show();
    }
  }
}

// Group subspaces that may have common first k-1 dimensions together
// where k = no of dimension of subspace
void TreeNode::group(const HashTree& hin, HashTree& hout)
{
  int k = hin.get_dim();
  int i, j, tmp;
  int no_cg;
  IntList* cg;  // Candidate generator

  // Group the subspace which may have common first k-1 dimensions
  // together
  
  if (isLeaf()) {
  // This level is leaf
    cg = new IntList[len];
    no_cg = len;
    
    for (i=0; i<len; i++) {
      cg[i].copy(*subspace[i]);
      // printf("H:"); cg[i].show(); printf("\n");
    }
  } else
  if (this->child[0]->isLeaf() && depth==hin.get_dim()) {
  // The next level is leaf
    // Calculate no of generator
    no_cg = 0;
    for (i=0; i<brFactor; i++)
      no_cg += child[i]->len;

    cg = new IntList[no_cg];
    
    // Collect all generator
    tmp=0;
    for (i=0; i<brFactor; i++)
      for (j=0; j<child[i]->len; j++) {
        cg[tmp].copy(*child[i]->subspace[j]);
        // printf("G:"); cg[tmp].show(); printf("\n");
        tmp++;
      }
          
  } else {
    // Transver until a level above leaf
    for (i=0; i<brFactor; i++)
      child[i]->group(hin, hout);
      
    return;
  }  
  
  ////////////////////////////////////////////////////////////////////

  IntList cand;  // Candidate subspace

  // Form candidate sets for next round
  
  for (i=0; i<no_cg; i++) 
    for (j=i+1; j<no_cg; j++) {
      if (cg[i].compare_k_1(cg[j])) {
        cand.copy(cg[i]);
        cand.insertSorted(cg[j][k-1]);

        // See whether it should be pruned
        if (!hin.prune(cand)) {
          // cand is our required candidate
          hout.insert(cand);
        }
      }
    }

  delete[] cg;
}

// Convert subtree to array
void TreeNode::convert2array(IntList*& ia, int& idx_ss) const
{
  int i;

  if (isLeaf()) {
    // Leaf
    for (i=0; i<len; i++) {
      ia[idx_ss].copy(*subspace[i]);
      idx_ss++;
    }
  } else {
    // Internal
    for (i=0; i<brFactor; i++) {
      child[i]->convert2array(ia,idx_ss);
    }
  }
}                                      
                                      
// Set the statistics of a subspace
void TreeNode::setStat(const IntList& ss, const EntStat& es)
{
  TreeNode* leafNode;
  int i;

  // Transver to leaf node
  leafNode = transver(ss);
  
  // Find the subspace in leaf node
  for (i=0; i<leafNode->len; i++) {
    if (ss==*leafNode->subspace[i]) {
      // Copy the statistics
      memcpy(&leafNode->sta[i],&es,sizeof(EntStat));
    }
  }
}

// Filter out subspaces which do not satisify threshold
void TreeNode::applyThreshold(EntStat es, int& no_ss, HashTree& ht)
{
  int i;

  if (isLeaf()) {
    // Leaf
    for (i=0; i<len; i++) {
#if 1
      if ((sta[i].entropy>es.entropy) 
          || (sta[i].interest<0.05 && subspace[i]->get_len()>=3)) {
#else
      if (sta[i].entropy>es.entropy)  {
#endif
        // Do not satisify entropy threshold      
        remove(*subspace[i], no_ss); 
        i--;  // Move one step backward after remove
      }
      else {
        if (es.interest > 1e-10) { // We are mining significant subspaces
          if (sta[i].interest>es.interest) {
            ht.insert(*subspace[i]);      // Insert into significant subspace
            ht.setStat(*subspace[i],sta[i]);
            remove(*subspace[i], no_ss);  // Remove from non-significant subspace
            i--;  // Move one step backward after remove
          }
        } else {                   // We are mining interesting subspaces
          if (sta[i].interest_gain>es.interest_gain) {
            ht.insert(*subspace[i]);      // Insert into significant subspace
            ht.setStat(*subspace[i],sta[i]);
            //remove(*subspace[i], no_ss);  // Remove from non-significant subspace
          }
        }      
      }
    }
  } else {
    // Internal
    for (i=0; i<brFactor; i++) {
      child[i]->applyThreshold(es,no_ss,ht);
    }
  }
}


//////////////////////////////////////////////////////////////////////

// Constructor
HashTree::HashTree(int n, int d)
:brFactor(n), dim(d), no_ss(0)
{
  root = new TreeNode(true,1,brFactor);
}

// Destructor
HashTree::~HashTree()
{
  delete root;
}

// Insert a new subspace
void HashTree::insert(const IntList& ss) 
{ 
  assert(ss.get_len()==dim);  

  root->insert(ss);  // insert it to tree
  no_ss++;           // update counter
}

// Remove a subspace
void HashTree::remove(const IntList& ss) 
{ 
  root->remove(ss, no_ss); 
}

// Check whether a subspace already exists in a subspace
bool HashTree::exist(const IntList& il) const
{
  TreeNode* leafNode;
  int i;
  
  // Transver to leaf node
  leafNode = root->transver(il);
  
  // Test if it is stored in leaf node
  for (i=0; i<leafNode->len; i++) {
    if (il==*leafNode->subspace[i]) return true;
  }
  
  return false;
}
  
// To see if a candidate subspace should be pruned away
bool HashTree::prune(const IntList& cand) const
{
  IntList ss;
  int i;

  assert(cand.get_len()==dim+1);

  // Generate all k-1 projection. If any one of them does not exist, 
  // prune away the subspaces 
  for (i=0; i<cand.get_len(); i++) {
    ss.copy(cand);
    ss.remove(i);

    if (!exist(ss)) return true;

#if 0
    // Not prune
    if (findStat(ss).interest<0.15 && ss.get_len()>=2) {
      printf("Exceptional case: "); cand.show();
      printf("Violating subspace: "); ss.show();
      printf("Interest = %f\n", findStat(ss).interest);
    }
#endif
  } 
  
  return false;
}

// Put all subspaces in the hash tree to an array
void HashTree::convert2array(IntList*& ia, int& idx_ss) const
{
  // index stating the first unused element of ia = 0
  idx_ss = 0;  

  // Allocate memory for the array
  ia = new IntList[no_ss];
  
  // Involve recursive member function
  root->convert2array(ia,idx_ss);
  
  // Warning: user of this function is responsible for releasing
  //          the memory allocated for ia
}

// Lock the tree
void HashTree::locktree()
{
  printf("Warning: HashTree::locktree() is called which is obsolete.\n");

}

// Set the statistics of a subspace
void HashTree::setStat(const IntList& ss, const EntStat& es)
{
  root->setStat(ss,es);

}

// Find the stored statistics from hash tree
EntStat HashTree::findStat(const IntList& ss) const
{
  TreeNode* leafNode;
  int i;

  // Transver to leaf node
  leafNode = root->transver(ss);

  // Find the subspace in the leaf node
  for (i=0; i<leafNode->len; i++) {
    if (ss==*leafNode->subspace[i]) {
      // Return the stat
      return leafNode->sta[i];
    }
  }
  
  printf("HashTree::findStat(): Warning: Cannot find the required subspace\n");
  EntStat es = {-1,-1,-1};
  return es;  
}

// Apply threshold
void HashTree::applyThreshold(EntStat es, HashTree& ht)
{
  root->applyThreshold(es,no_ss,ht);
}


//////////////////////////////////////////////////////////////////////

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -