⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hashforest.c

📁 Entropy-based CLIQUE算法改进
💻 C
字号:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <sys/resource.h>
#include "IntList.h"
#include "Param.h"
#include "NPoint.h"
#include "NArray.h"
#include "HashTree.h"
#include "DensityMap.h"
#include "HashForest.h"

// Constructor
HashForest::HashForest(char* p, EntStat es)
:sta(es), dataset(NULL)
{
  FILE* fp;
  int i;

  // Read the parameters from parameter file
  fp = fopen(p,"rt");
  if (fp==NULL) {
    printf("Cannot open param.ini\n");
    exit(0);
  }
  fscanf(fp,"%d\n", &prm.dim);
  fscanf(fp,"%f\n%f\n%f\n", &prm.min_val, &prm.max_val, &prm.int_size);
  fscanf(fp,"%d\n%d\n", &brFactor,&data_row);
  fscanf(fp,"%s\n",cluster_file);
  fclose(fp);

  // Create prm.dim HashTree pointers
  nonsig = new (HashTree*)[prm.dim];
  sig    = new (HashTree*)[prm.dim];
  
  // Instantiate prm.dim HashTree
  for (i=0; i<prm.dim; i++) {
    nonsig[i] = new HashTree(brFactor,i+1);
    sig[i] = new HashTree(brFactor,i+1);
  }
}

// Destructor
HashForest::~HashForest()
{
  int i;
  
  for (i=0; i<prm.dim; i++) {
    delete nonsig[i];
    delete sig[i];
  }
  
  delete[] nonsig;
  delete[] sig;
  
  if (dataset!=NULL) {
    // then release of dataset is necessary
    for (i=0; i<data_row; i++) {
      delete[] dataset[i];
    }
    
    delete[] dataset;
  }
} 

// The Mining Task
void HashForest::mine()
{
  DensityMap* dm;
  IntList il;  
  int cur_dim = 0;
  int i;
  float start_time;
  clock_t last_clock;
  struct rusage ru;

  // Report the parameters first
  printf("The dimensionality is %d.\n", prm.dim);
  printf("The thresholds are %f, %f and %f.\n",
    sta.entropy, sta.interest, sta.interest_gain);

  if (READ_ONCE) readfile();  // Read the file into memory

  // Mark the time
  last_clock = clock();  
  getrusage(RUSAGE_SELF, &ru);
  start_time = ru.ru_utime.tv_sec + ru.ru_stime.tv_sec + 
    float(ru.ru_utime.tv_usec+ru.ru_stime.tv_usec)/1e6; 
    
  // Let nonsig[0] be all the individual dimensions
  for (i=0; i<prm.dim; i++) {
    il.clear();
    il.insertSorted(i);
    nonsig[0]->insert(il);
  }

  // Process pass by pass
  while (cur_dim<prm.dim) {
    if (nonsig[cur_dim]->get_no_ss()==0) goto exit_loop;
  
    // Count density and calculate entropy
    dm = new DensityMap(prm,*nonsig[cur_dim]);
    dm->build_grid(dataset, *this, *nonsig[cur_dim]);
    delete dm;

    // Show dimensions and number of candidate subspaces
    printf("\nPass %d: no of candidate subspaces = %d\n", cur_dim+1, nonsig[cur_dim]->get_no_ss());

    // Apply the thresholds    
    nonsig[cur_dim]->applyThreshold(sta,*sig[cur_dim]);

#if 1
    // Show large sets
    printf("Non-significant subspaces (%d):\n",nonsig[cur_dim]->get_no_ss());
    nonsig[cur_dim]->show();
    printf("Significant subspaces (%d):\n",sig[cur_dim]->get_no_ss());
    sig[cur_dim]->show();
    printf("\n");
#endif

    // Report time
    printf("Pass %d: %f secs used\n", cur_dim+1,float(clock()-last_clock)/CLOCKS_PER_SEC);
    last_clock = clock();
    
    // Generate next pass
    nonsig[cur_dim]->group(*nonsig[cur_dim+1]);
    
    cur_dim++;
  }
  
exit_loop:
  // Report time
  getrusage(RUSAGE_SELF, &ru);
  printf("The rusage = %f secs\n\n",ru.ru_utime.tv_sec+ru.ru_stime.tv_sec+
    float(ru.ru_utime.tv_usec+ru.ru_stime.tv_usec)/1e6 - start_time);
}

// Find the entropy of an individual dimension
float HashForest::entropy_dim(int d)
{
  IntList is;
  
  is.prepend(d);
  return nonsig[0]->findStat(is).entropy;
}

// Find the statistics of a given subspace
EntStat HashForest::findStat(const IntList& ss)
{
  if (ss.get_len() > prm.dim) {
    printf("HashForest::findStat(): Warning: len of subspace > maximum dimension\n");
  }
  
  return nonsig[ss.get_len()-1]->findStat(ss);
}

// Read the data file into the variable dataset
void HashForest::readfile()
{
  FILE* fp;
  int i,count = 0;
  float f;
  
  printf("The predefined number of rows is %d.\n", data_row);
    
  // Allocate a data_row*prm.dim array
  dataset = new (float*)[data_row];
  for (i=0; i<data_row; i++) {
    dataset[i] = new float[prm.dim];
  }

  // Open file
  fp = fopen(cluster_file,"rt");
  if (fp==NULL) {
    printf("DensityMap::readfile(): Cannot open '%s'\n",cluster_file);
    exit(3);
  }

  while (!feof(fp)) {
    // Read one row from the cluster file
    for(i=0; i<prm.dim; i++) {
      fscanf(fp,"%f ",&f);
      
      // Valid the value
      if (f < prm.min_val || f > prm.max_val) {
        printf("HashForest::readfile(): Warning: Invalid value %f\n",f);
      }
      if (f > prm.max_val-1e-5) f = prm.max_val-1e-5;  // ensure not exceed the max
      dataset[count][i] = f;
    }

    count++;
  }

  // Check if number of row is correct
  if (count!=data_row) {
    printf("HashForest::readfile(): Warning: data_row incorrectly set.\n");
    printf("                        actual number of rows = %d\n", count);
  }

  fclose(fp);  // Close file
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -