📄 gen.cpp

📁 IBM实验室提供的数据集生成器源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
#include "gen.h"#include <assert.h>#include <string.h>#include <math.h>//------------------------------- Parameters -------------------------------void PatternPar::write(ostream &fp){  fp << "\tNumber of patterns = " << npats << endl;  fp << "\tAverage length of pattern = " << patlen << endl;  fp << "\tCorrelation between consecutive patterns = " << corr << endl;  fp << "\tAverage confidence in a rule = " << conf << endl;  fp << "\tVariation in the confidence = " << conf_var << endl;}void TransPar::write(ostream &fp){  fp << "Number of transactions in database = " << ntrans << endl;  fp << "Average transaction length = " << tlen << endl;  fp << "Number of items = " << nitems << endl;  fp << "Large Itemsets:" << endl;  lits.write(fp);  fp << endl;}// calculate the number of roots, given the number of levelsvoid TaxPar::calc_values(void){  LINT nset;    nset = 0;  nset += (nlevels != 0);  nset += (fanout != 0);  nset += (nroots != 0);    switch (nset)     {    case 0:	// fill in defaults      nroots = 250;      fanout = 5;      return;    case 1:	// need to fill in 1 value      assert (nlevels == 0);      if (fanout == 0)	fanout = 5;      else if (nroots == 0)	nroots = 250;      return;    case 2:      if (nlevels == 0)		// all set!	return;      if (fanout != 0) {	// calculate nroots	nroots = nitems / (1 + pow(DOUBLE(fanout), DOUBLE(nlevels-1)));	if (nroots < 1)	  nroots = 1;      }      else if (nroots != 0) {	// calculate fanout	FLOAT temp;	temp = (FLOAT)nitems / nroots - 1;	temp = log((DOUBLE)temp) / (nlevels - 1);	fanout = exp((DOUBLE)temp);      }    case 3:			// all set!      return;    }}void TaxPar::write(ostream &fp){  fp << "Number of transactions in database = " << ntrans << endl;  fp << "Average transaction length = " << tlen << endl;  fp << "Number of items = " << nitems << endl;  fp << "Number of roots = " << nroots << endl;  fp << "Number of levels = " << nlevels << endl;  fp << "Average fanout = " << fanout << endl;  fp << "Large Itemsets:" << endl;  lits.write(fp);  fp << endl;}void SeqPar::write(ostream &fp){  fp << "Number of customers in database = " << ncust << endl;  fp << "Average sequence length = " << slen << endl;  fp << "Average transaction length = " << tlen << endl;  fp << "Number of items = " << nitems << endl;  fp << "Repetition-level = " << rept << endl;  fp << "Variation in repetition-level = " << rept_var << endl;  fp << "Large Itemsets:" << endl;  lits.write(fp);  fp << "Large Sequences:" << endl;  lseq.write(fp);  fp << endl;}//------------------------------ Taxonomy ------------------------------const LINT Taxonomy::item_len = 7;//// Constructor: reads taxonomy file and builds taxonomy as a DAG//Taxonomy::Taxonomy(LINT num_items,	// total number of items		   LINT num_roots,	// number of roots		   FLOAT fanout,	// average fanout		   FLOAT depth_ratio	// average ratio of ....		   )  : nitems(num_items), nroots(num_roots), depth(depth_ratio){  LINT i, j;  LINT next_child;  PoissonDist nchildren(fanout-1);	// string length   // allocate memory  par = new LINT [nitems];  child_start = new LINT [nitems];  child_end = new LINT [nitems];  next_child = nroots;  // initialize parents (or lack thereof) for roots  for (i = 0; i < nroots; i++)    par[i] = -1;  // set up all the interior nodes  for (i = 0, j = next_child; i < nitems && next_child < nitems; i++)    {      child_start[i] = next_child;      next_child += nchildren() + 1;      if (next_child > nitems) 	next_child = nitems;      child_end[i] = next_child;      for (; j < next_child; j++)	par[j] = i;    }  // initialize children (or lack thereof) for all the leaves  for (; i < nitems; i++)    child_start[i] =     child_end[i] = -1;}Taxonomy::~Taxonomy(void){  LINT i;  delete [] par;  delete [] child_start;  delete [] child_end;}void Taxonomy::write(ofstream &fp){  for (LINT i = 0; i < nitems; i++)    if (par[i] >= 0) {      assert(i != par[i]);      fp.write((char *)&i, sizeof(LINT));      fp.write((char *)&par[i], sizeof(LINT));    }}void Taxonomy::write_asc(ofstream &fp){  for (LINT i = 0; i < nitems; i++)    if (par[i] >= 0) {      assert(i != par[i]);      fp << setw(item_len) << i << " " << setw(item_len) << par[i] << endl;    }}void Taxonomy::display(ofstream &fp){  fp << "Taxonomy: " << endl;  for (LINT i = 0; i < nitems && child_start[i] > 0; i++)    fp << i << " " << child_start[i] << " " << child_end[i]-1 << endl;  fp << endl;}//------------------------------- ItemSet -------------------------------ItemSet::ItemSet(LINT num_items, 	// number of items		 Taxonomy *ptax		// taxonomy (optional)		 )  : tax(ptax), nitems(num_items){  ExpDist freq;  LINT i, j;  cum_prob = new FLOAT [nitems];  if (tax)    tax_prob = new FLOAT [nitems];  else    tax_prob = NULL;  for (i = 0; i < nitems; i++)    cum_prob[i] = freq();	// prob. that this pattern will be picked  if (tax) {			// weight(itm) += wieght(children)    // normalize probabilities for the roots and for children    normalize(cum_prob, 0, tax->num_roots()-1);    for (i = 0; i < nitems && tax->num_children(i) > 0; i++)      normalize(cum_prob, tax->first_child(i), tax->last_child(i));    // calulate cumulative probabilities for children    for (i = 0; i < nitems; i++)      tax_prob[i] = cum_prob[i];    for (i = 1; i < nitems; i++)      if (tax->num_children(i) > 0)	for (j = tax->first_child(i); j < tax->last_child(i); j++)	  tax_prob[j+1] += tax_prob[j];    // set real probabilities    for (i = tax->num_roots(); i < nitems; i++)      cum_prob[i] *= cum_prob[ tax->parent(i) ] * tax->depth_ratio();  }  // normalize probabilites (why -- see get_pat)  normalize(cum_prob, 0, nitems-1);  for (i = 1; i < nitems; i++)	// calulate cumulative probabilities    cum_prob[i] += cum_prob[i-1];}ItemSet::~ItemSet(){  delete [] cum_prob;}//  normalize probabilities between low and high//void ItemSet::normalize(FLOAT prob[], LINT low, LINT high){  FLOAT tot;  LINT i;  // normalize probabilites  tot = 0;  for (i = low; i <= high; i++)    tot += prob[i];  for (i = low; i <= high; i++)    prob[i] /= tot;}// returns a pattern chosen at random//Item ItemSet::get_item(void){   FLOAT r;  LINT i;  // find the desired pattern using cum_prob table  r = rand();  // want item i such that cum_prob[i-1] < r <= cum_prob[i];  i = r * nitems;			// guess location of item  i += (r-cum_prob[i]) * nitems;	// refine guess  if (i >= nitems)			// check boundaries    i = nitems-1;  if (i < 0)    i = 0;  while ( i < (nitems-1) && r > cum_prob[i] )	// find item    i++;  while ( i > 0 && r <= cum_prob[i-1] )    i--;  return i;};// if no taxonomy, returns itm//Item ItemSet::specialize(Item itm){  FLOAT r;  LINT i, nchildren;  Item first, last;  if (!tax) 		// no taxonomy    return itm;  nchildren = tax->num_children(itm);  if (nchildren == 0)		// no children    return itm;    first = tax->child(itm, 0);  last = tax->child(itm, nchildren-1);  // find the desired pattern using cum_prob table  r = rand();  i = first + r * nchildren;  if (i == last)    i--;  while ( i < last && r > tax_prob[i] )    i++;  while ( i > first && r < tax_prob[i-1] )    i--;  return specialize(i);}  FLOAT ItemSet::weight(Item itm)	// returns prob. of choosing item{  if (itm == 0)    return cum_prob[itm];  else    return cum_prob[itm] - cum_prob[itm-1];}void ItemSet::display(ofstream &fp){//  if (tax != NULL)//    tax->display(fp);  fp << "Items:" << endl;  fp << setprecision(3);  if (tax != NULL) {    if (cum_prob[0] * nitems > 10)      fp << 0 << "  " << cum_prob[0] * nitems << " "	<< tax->first_child(0) << " " << tax->last_child(0) << endl;    for (LINT i = 1; i < nitems; i++)      if ((cum_prob[i]-cum_prob[i-1]) * nitems > 10)	fp << i << "  " << (cum_prob[i]-cum_prob[i-1]) * nitems << " "	  << tax->first_child(i) << " " << tax->last_child(i) << endl;  }  else {    if (cum_prob[0] * nitems > 5)      fp << 0 << "  " << cum_prob[0] * nitems << endl;    for (LINT i = 1; i < nitems; i++)      if ((cum_prob[i]-cum_prob[i-1]) * nitems > 5)	fp << i << "  " << (cum_prob[i]-cum_prob[i-1]) * nitems << endl;  }  fp << setprecision(0);  fp << endl;}    //--------------------------------- String ---------------------------------String::String(LINT n)	// number of items  : nitems(n){  items = new LINT [nitems];//  rval = new FLOAT [nitems];//  ritems = new LINT [nitems];}String::~String(void){   delete [] items;//  delete [] rval;//  delete [] ritems;}void String::display(ofstream &fp, LINT prob_comp){  fp << setw(6) << prob_comp * prob << " " << setw(6) << conf << " ";  for(LINT i = 0; i < nitems; i++)     fp << " " << items[i];  fp << endl;  return;}void String::display(ofstream &fp, StringSet &lits, LINT prob_comp){  LINT i, j;  StringP lstr;  fp << setw(6) << prob_comp * prob << " " << setw(6) << conf << " ";  for(i = 0; i < nitems; i++)     {      fp << "  << ";      lstr = lits.get_pat(items[i]);      for (j = 0; j < lstr->nitems; j++) 	fp << lstr->items[j] << " ";      fp << ">>";    }  fp << endl;  return;}// shuffles items in string//// void String::shuffle(void)// {//   UniformDist ud;//   LINT i, j, ival;//   FLOAT fval;//   // associate a random value with each item//   for (i = 0; i < nitems; i++)//     rval[i] = ud();  //   // sort items according to the values in rval//   for (i = 0; i < nitems; i++ )//     {//       ival = items[i]; fval = rval[i];//       for ( j = i; j > 0 && rval[j-1] > fval; j-- ) {// 	  items[j] = items[j-1];// 	  rval[j] = rval[j-1];// 	}//       items[j] = ival;//       rval[j] = fval;//     }// }//------------------------------- StringSet -------------------------------StringSet::StringSet(LINT nitems, 	// number of items		     PatternPar par,	// npats, patlen, corr, conf & conf_var		     Taxonomy *ptax,	// taxonomy (optional)		     FLOAT rept,	// repetition-level		     FLOAT rept_var	// variation in repetition-level		     )  : tax(ptax){  NormalDist conf(par.conf, par.conf_var);  ExpDist freq;  ExpDist corr_lvl;  PoissonDist len(par.patlen-1);	// string length  NormalDist repeat(rept, rept_var);  UniformDist ud;  items = new ItemSet(nitems, tax);	// associate probabilities with items
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -