📄 hashtree.c
字号:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "IntList.h"
#include "HashTree.h"
//////////////////////////////////////////////////////////////////////
// Constructor
TreeNode::TreeNode(bool l, int d, int n)
:sta(NULL), len(0), depth(d), brFactor(n)
{
int i;
if (!l) {
// Internal node
subspace = NULL;
child = new (TreeNode*)[n];
} else {
// Leaf node
// Allocate the pointers to subspaces
subspace = new (IntList*)[n];
child = NULL;
for (i=0; i<n; i++) subspace[i]=NULL;
// We need to initialize the sta field
sta = new EntStat[brFactor];
for (i=0; i<brFactor; i++) {
sta[i].entropy = 0;
sta[i].interest = 0;
sta[i].interest_gain = 0;
}
}
}
// Destructor
TreeNode::~TreeNode()
{
int i;
if (!isLeaf()) {
// Internal node
delete[] child;
} else {
// Leaf node
for (i=0; i<len; i++) delete subspace[i];
delete[] subspace;
delete[] sta;
}
}
// A hash function
int TreeNode::hashFunc(int i)
{
return i%brFactor;
}
// Given a subspace, transver to the leaf node
TreeNode* TreeNode::transver(const IntList& i)
{
TreeNode* curNode = this;
int d = depth-1;
while (!curNode->isLeaf()) {
assert(d<i.get_len());
curNode = curNode->child[hashFunc(i[d])];
d++;
}
return curNode;
}
// Insert a subspace into the hash tree
void TreeNode::insert(const IntList& ss)
{
TreeNode* leafNode;
// find the leaf node to insert to
leafNode = transver(ss);
// check if it is full or not
if (leafNode->len < leafNode->brFactor) {
// Not full
leafNode->subspace[leafNode->len] = new IntList;
leafNode->subspace[leafNode->len]->copy(ss);
leafNode->len++;
} else {
// Full
// See if splitting is possible
if (leafNode->depth<=ss.get_len()) {
// Splitting
leafNode->split();
leafNode->insert(ss);
} else {
printf("Resize the leaf node...\n");
leafNode->resize(leafNode->brFactor*2);
leafNode->subspace[leafNode->len] = new IntList;
leafNode->subspace[leafNode->len]->copy(ss);
leafNode->len++;
}
}
}
// Split a leaf node
void TreeNode::split()
{
IntList** reinsert;
int oldlen, i;
// Convert the leaf node into an internal node
// Create children
child = new (TreeNode*)[brFactor];
for (i=0; i<brFactor; i++) {
child[i] = new TreeNode(true,depth+1,brFactor);
}
// Convert to an internal node
reinsert = subspace;
oldlen = len;
subspace = NULL;
len = 0;
// Reinsert the subspaces
for (i=0; i<oldlen; i++) {
insert(*reinsert[i]);
setStat(*reinsert[i],sta[i]);
delete reinsert[i];
}
// Release old subspaces
delete[] reinsert;
delete[] sta;
}
// Resize a leaf node
void TreeNode::resize(int newsize)
{
IntList** temp_ss;
EntStat* temp_sta;
if (!isLeaf()) {
printf("TreeNode::resize(): Resize of internal node is invalid.\n");
return;
}
if (newsize<=brFactor) {
printf("TreeNode::resize(): Cannot reduce the size of a node.\n");
return;
}
// Copy the old subspace and statistics
temp_ss = subspace;
temp_sta = sta;
// Allocate new and larger space
subspace = new (IntList*)[newsize];
memcpy(subspace,temp_ss,sizeof(IntList*)*brFactor);
sta = new EntStat[newsize];
memcpy(sta,temp_sta,sizeof(EntStat)*brFactor);
brFactor = newsize;
// Delete temp data
delete[] temp_ss;
delete[] temp_sta;
}
// Remove subspace
void TreeNode::remove(const IntList& ss, int& no_ss)
{
TreeNode* leafNode;
int i;
// Transver to leaf node
leafNode = transver(ss);
// Search the subspace to be deleted
for (i=0; i<leafNode->len; i++) {
if (ss==*leafNode->subspace[i]) {
if (i==leafNode->len-1) {
// Last subspace
leafNode->len--;
} else {
// Not last subspace
leafNode->subspace[i]->copy(*leafNode->subspace[leafNode->len-1]);
leafNode->sta[i] = leafNode->sta[leafNode->len-1];
leafNode->len--;
}
no_ss--; // Update the count of no of subspaces
return;
}
}
printf("TreeNode::remove(): Subspace not found\n");
}
// Show the content of a node
void TreeNode::show() const
{
int i;
if (isLeaf()) {
// Leaf
if (len>0) {
//printf("%d:",depth);
//printf("{");
for (i=0; i<len; i++) {
subspace[i]->show();
if (sta!=NULL)
printf("\t%.4f\t%.4f\t%.4f\n",sta[i].entropy,sta[i].interest,sta[i].interest_gain);
//if (i!=len-1) printf(", ");
}
//printf("}\n");
}
} else {
// Internal
//printf("Internal node\n");
for (i=0; i<brFactor; i++) {
child[i]->show();
}
}
}
// Group subspaces that may have common first k-1 dimensions together
// where k = no of dimension of subspace
void TreeNode::group(const HashTree& hin, HashTree& hout)
{
int k = hin.get_dim();
int i, j, tmp;
int no_cg;
IntList* cg; // Candidate generator
// Group the subspace which may have common first k-1 dimensions
// together
if (isLeaf()) {
// This level is leaf
cg = new IntList[len];
no_cg = len;
for (i=0; i<len; i++) {
cg[i].copy(*subspace[i]);
// printf("H:"); cg[i].show(); printf("\n");
}
} else
if (this->child[0]->isLeaf() && depth==hin.get_dim()) {
// The next level is leaf
// Calculate no of generator
no_cg = 0;
for (i=0; i<brFactor; i++)
no_cg += child[i]->len;
cg = new IntList[no_cg];
// Collect all generator
tmp=0;
for (i=0; i<brFactor; i++)
for (j=0; j<child[i]->len; j++) {
cg[tmp].copy(*child[i]->subspace[j]);
// printf("G:"); cg[tmp].show(); printf("\n");
tmp++;
}
} else {
// Transver until a level above leaf
for (i=0; i<brFactor; i++)
child[i]->group(hin, hout);
return;
}
////////////////////////////////////////////////////////////////////
IntList cand; // Candidate subspace
// Form candidate sets for next round
for (i=0; i<no_cg; i++)
for (j=i+1; j<no_cg; j++) {
if (cg[i].compare_k_1(cg[j])) {
cand.copy(cg[i]);
cand.insertSorted(cg[j][k-1]);
// See whether it should be pruned
if (!hin.prune(cand)) {
// cand is our required candidate
hout.insert(cand);
}
}
}
delete[] cg;
}
// Convert subtree to array
void TreeNode::convert2array(IntList*& ia, int& idx_ss) const
{
int i;
if (isLeaf()) {
// Leaf
for (i=0; i<len; i++) {
ia[idx_ss].copy(*subspace[i]);
idx_ss++;
}
} else {
// Internal
for (i=0; i<brFactor; i++) {
child[i]->convert2array(ia,idx_ss);
}
}
}
// Set the statistics of a subspace
void TreeNode::setStat(const IntList& ss, const EntStat& es)
{
TreeNode* leafNode;
int i;
// Transver to leaf node
leafNode = transver(ss);
// Find the subspace in leaf node
for (i=0; i<leafNode->len; i++) {
if (ss==*leafNode->subspace[i]) {
// Copy the statistics
memcpy(&leafNode->sta[i],&es,sizeof(EntStat));
}
}
}
// Filter out subspaces which do not satisify threshold
void TreeNode::applyThreshold(EntStat es, int& no_ss, HashTree& ht)
{
int i;
if (isLeaf()) {
// Leaf
for (i=0; i<len; i++) {
#if 1
if ((sta[i].entropy>es.entropy)
|| (sta[i].interest<0.05 && subspace[i]->get_len()>=3)) {
#else
if (sta[i].entropy>es.entropy) {
#endif
// Do not satisify entropy threshold
remove(*subspace[i], no_ss);
i--; // Move one step backward after remove
}
else {
if (es.interest > 1e-10) { // We are mining significant subspaces
if (sta[i].interest>es.interest) {
ht.insert(*subspace[i]); // Insert into significant subspace
ht.setStat(*subspace[i],sta[i]);
remove(*subspace[i], no_ss); // Remove from non-significant subspace
i--; // Move one step backward after remove
}
} else { // We are mining interesting subspaces
if (sta[i].interest_gain>es.interest_gain) {
ht.insert(*subspace[i]); // Insert into significant subspace
ht.setStat(*subspace[i],sta[i]);
//remove(*subspace[i], no_ss); // Remove from non-significant subspace
}
}
}
}
} else {
// Internal
for (i=0; i<brFactor; i++) {
child[i]->applyThreshold(es,no_ss,ht);
}
}
}
//////////////////////////////////////////////////////////////////////
// Constructor
HashTree::HashTree(int n, int d)
:brFactor(n), dim(d), no_ss(0)
{
root = new TreeNode(true,1,brFactor);
}
// Destructor
HashTree::~HashTree()
{
delete root;
}
// Insert a new subspace
void HashTree::insert(const IntList& ss)
{
assert(ss.get_len()==dim);
root->insert(ss); // insert it to tree
no_ss++; // update counter
}
// Remove a subspace
void HashTree::remove(const IntList& ss)
{
root->remove(ss, no_ss);
}
// Check whether a subspace already exists in a subspace
bool HashTree::exist(const IntList& il) const
{
TreeNode* leafNode;
int i;
// Transver to leaf node
leafNode = root->transver(il);
// Test if it is stored in leaf node
for (i=0; i<leafNode->len; i++) {
if (il==*leafNode->subspace[i]) return true;
}
return false;
}
// To see if a candidate subspace should be pruned away
bool HashTree::prune(const IntList& cand) const
{
IntList ss;
int i;
assert(cand.get_len()==dim+1);
// Generate all k-1 projection. If any one of them does not exist,
// prune away the subspaces
for (i=0; i<cand.get_len(); i++) {
ss.copy(cand);
ss.remove(i);
if (!exist(ss)) return true;
#if 0
// Not prune
if (findStat(ss).interest<0.15 && ss.get_len()>=2) {
printf("Exceptional case: "); cand.show();
printf("Violating subspace: "); ss.show();
printf("Interest = %f\n", findStat(ss).interest);
}
#endif
}
return false;
}
// Put all subspaces in the hash tree to an array
void HashTree::convert2array(IntList*& ia, int& idx_ss) const
{
// index stating the first unused element of ia = 0
idx_ss = 0;
// Allocate memory for the array
ia = new IntList[no_ss];
// Involve recursive member function
root->convert2array(ia,idx_ss);
// Warning: user of this function is responsible for releasing
// the memory allocated for ia
}
// Lock the tree
void HashTree::locktree()
{
printf("Warning: HashTree::locktree() is called which is obsolete.\n");
}
// Set the statistics of a subspace
void HashTree::setStat(const IntList& ss, const EntStat& es)
{
root->setStat(ss,es);
}
// Find the stored statistics from hash tree
EntStat HashTree::findStat(const IntList& ss) const
{
TreeNode* leafNode;
int i;
// Transver to leaf node
leafNode = root->transver(ss);
// Find the subspace in the leaf node
for (i=0; i<leafNode->len; i++) {
if (ss==*leafNode->subspace[i]) {
// Return the stat
return leafNode->sta[i];
}
}
printf("HashTree::findStat(): Warning: Cannot find the required subspace\n");
EntStat es = {-1,-1,-1};
return es;
}
// Apply threshold
void HashTree::applyThreshold(EntStat es, HashTree& ht)
{
root->applyThreshold(es,no_ss,ht);
}
//////////////////////////////////////////////////////////////////////
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -