📄 gen.cpp
字号:
#include "gen.h"#include <assert.h>#include <string.h>#include <math.h>//------------------------------- Parameters -------------------------------void PatternPar::write(ostream &fp){ fp << "\tNumber of patterns = " << npats << endl; fp << "\tAverage length of pattern = " << patlen << endl; fp << "\tCorrelation between consecutive patterns = " << corr << endl; fp << "\tAverage confidence in a rule = " << conf << endl; fp << "\tVariation in the confidence = " << conf_var << endl;}void TransPar::write(ostream &fp){ fp << "Number of transactions in database = " << ntrans << endl; fp << "Average transaction length = " << tlen << endl; fp << "Number of items = " << nitems << endl; fp << "Large Itemsets:" << endl; lits.write(fp); fp << endl;}// calculate the number of roots, given the number of levelsvoid TaxPar::calc_values(void){ LINT nset; nset = 0; nset += (nlevels != 0); nset += (fanout != 0); nset += (nroots != 0); switch (nset) { case 0: // fill in defaults nroots = 250; fanout = 5; return; case 1: // need to fill in 1 value assert (nlevels == 0); if (fanout == 0) fanout = 5; else if (nroots == 0) nroots = 250; return; case 2: if (nlevels == 0) // all set! return; if (fanout != 0) { // calculate nroots nroots = nitems / (1 + pow(DOUBLE(fanout), DOUBLE(nlevels-1))); if (nroots < 1) nroots = 1; } else if (nroots != 0) { // calculate fanout FLOAT temp; temp = (FLOAT)nitems / nroots - 1; temp = log((DOUBLE)temp) / (nlevels - 1); fanout = exp((DOUBLE)temp); } case 3: // all set! return; }}void TaxPar::write(ostream &fp){ fp << "Number of transactions in database = " << ntrans << endl; fp << "Average transaction length = " << tlen << endl; fp << "Number of items = " << nitems << endl; fp << "Number of roots = " << nroots << endl; fp << "Number of levels = " << nlevels << endl; fp << "Average fanout = " << fanout << endl; fp << "Large Itemsets:" << endl; lits.write(fp); fp << endl;}void SeqPar::write(ostream &fp){ fp << "Number of customers in database = " << ncust << endl; fp << "Average sequence length = " << slen << endl; fp << "Average transaction length = " << tlen << endl; fp << "Number of items = " << nitems << endl; fp << "Repetition-level = " << rept << endl; fp << "Variation in repetition-level = " << rept_var << endl; fp << "Large Itemsets:" << endl; lits.write(fp); fp << "Large Sequences:" << endl; lseq.write(fp); fp << endl;}//------------------------------ Taxonomy ------------------------------const LINT Taxonomy::item_len = 7;//// Constructor: reads taxonomy file and builds taxonomy as a DAG//Taxonomy::Taxonomy(LINT num_items, // total number of items LINT num_roots, // number of roots FLOAT fanout, // average fanout FLOAT depth_ratio // average ratio of .... ) : nitems(num_items), nroots(num_roots), depth(depth_ratio){ LINT i, j; LINT next_child; PoissonDist nchildren(fanout-1); // string length // allocate memory par = new LINT [nitems]; child_start = new LINT [nitems]; child_end = new LINT [nitems]; next_child = nroots; // initialize parents (or lack thereof) for roots for (i = 0; i < nroots; i++) par[i] = -1; // set up all the interior nodes for (i = 0, j = next_child; i < nitems && next_child < nitems; i++) { child_start[i] = next_child; next_child += nchildren() + 1; if (next_child > nitems) next_child = nitems; child_end[i] = next_child; for (; j < next_child; j++) par[j] = i; } // initialize children (or lack thereof) for all the leaves for (; i < nitems; i++) child_start[i] = child_end[i] = -1;}Taxonomy::~Taxonomy(void){ LINT i; delete [] par; delete [] child_start; delete [] child_end;}void Taxonomy::write(ofstream &fp){ for (LINT i = 0; i < nitems; i++) if (par[i] >= 0) { assert(i != par[i]); fp.write((char *)&i, sizeof(LINT)); fp.write((char *)&par[i], sizeof(LINT)); }}void Taxonomy::write_asc(ofstream &fp){ for (LINT i = 0; i < nitems; i++) if (par[i] >= 0) { assert(i != par[i]); fp << setw(item_len) << i << " " << setw(item_len) << par[i] << endl; }}void Taxonomy::display(ofstream &fp){ fp << "Taxonomy: " << endl; for (LINT i = 0; i < nitems && child_start[i] > 0; i++) fp << i << " " << child_start[i] << " " << child_end[i]-1 << endl; fp << endl;}//------------------------------- ItemSet -------------------------------ItemSet::ItemSet(LINT num_items, // number of items Taxonomy *ptax // taxonomy (optional) ) : tax(ptax), nitems(num_items){ ExpDist freq; LINT i, j; cum_prob = new FLOAT [nitems]; if (tax) tax_prob = new FLOAT [nitems]; else tax_prob = NULL; for (i = 0; i < nitems; i++) cum_prob[i] = freq(); // prob. that this pattern will be picked if (tax) { // weight(itm) += wieght(children) // normalize probabilities for the roots and for children normalize(cum_prob, 0, tax->num_roots()-1); for (i = 0; i < nitems && tax->num_children(i) > 0; i++) normalize(cum_prob, tax->first_child(i), tax->last_child(i)); // calulate cumulative probabilities for children for (i = 0; i < nitems; i++) tax_prob[i] = cum_prob[i]; for (i = 1; i < nitems; i++) if (tax->num_children(i) > 0) for (j = tax->first_child(i); j < tax->last_child(i); j++) tax_prob[j+1] += tax_prob[j]; // set real probabilities for (i = tax->num_roots(); i < nitems; i++) cum_prob[i] *= cum_prob[ tax->parent(i) ] * tax->depth_ratio(); } // normalize probabilites (why -- see get_pat) normalize(cum_prob, 0, nitems-1); for (i = 1; i < nitems; i++) // calulate cumulative probabilities cum_prob[i] += cum_prob[i-1];}ItemSet::~ItemSet(){ delete [] cum_prob;}// normalize probabilities between low and high//void ItemSet::normalize(FLOAT prob[], LINT low, LINT high){ FLOAT tot; LINT i; // normalize probabilites tot = 0; for (i = low; i <= high; i++) tot += prob[i]; for (i = low; i <= high; i++) prob[i] /= tot;}// returns a pattern chosen at random//Item ItemSet::get_item(void){ FLOAT r; LINT i; // find the desired pattern using cum_prob table r = rand(); // want item i such that cum_prob[i-1] < r <= cum_prob[i]; i = r * nitems; // guess location of item i += (r-cum_prob[i]) * nitems; // refine guess if (i >= nitems) // check boundaries i = nitems-1; if (i < 0) i = 0; while ( i < (nitems-1) && r > cum_prob[i] ) // find item i++; while ( i > 0 && r <= cum_prob[i-1] ) i--; return i;};// if no taxonomy, returns itm//Item ItemSet::specialize(Item itm){ FLOAT r; LINT i, nchildren; Item first, last; if (!tax) // no taxonomy return itm; nchildren = tax->num_children(itm); if (nchildren == 0) // no children return itm; first = tax->child(itm, 0); last = tax->child(itm, nchildren-1); // find the desired pattern using cum_prob table r = rand(); i = first + r * nchildren; if (i == last) i--; while ( i < last && r > tax_prob[i] ) i++; while ( i > first && r < tax_prob[i-1] ) i--; return specialize(i);} FLOAT ItemSet::weight(Item itm) // returns prob. of choosing item{ if (itm == 0) return cum_prob[itm]; else return cum_prob[itm] - cum_prob[itm-1];}void ItemSet::display(ofstream &fp){// if (tax != NULL)// tax->display(fp); fp << "Items:" << endl; fp << setprecision(3); if (tax != NULL) { if (cum_prob[0] * nitems > 10) fp << 0 << " " << cum_prob[0] * nitems << " " << tax->first_child(0) << " " << tax->last_child(0) << endl; for (LINT i = 1; i < nitems; i++) if ((cum_prob[i]-cum_prob[i-1]) * nitems > 10) fp << i << " " << (cum_prob[i]-cum_prob[i-1]) * nitems << " " << tax->first_child(i) << " " << tax->last_child(i) << endl; } else { if (cum_prob[0] * nitems > 5) fp << 0 << " " << cum_prob[0] * nitems << endl; for (LINT i = 1; i < nitems; i++) if ((cum_prob[i]-cum_prob[i-1]) * nitems > 5) fp << i << " " << (cum_prob[i]-cum_prob[i-1]) * nitems << endl; } fp << setprecision(0); fp << endl;} //--------------------------------- String ---------------------------------String::String(LINT n) // number of items : nitems(n){ items = new LINT [nitems];// rval = new FLOAT [nitems];// ritems = new LINT [nitems];}String::~String(void){ delete [] items;// delete [] rval;// delete [] ritems;}void String::display(ofstream &fp, LINT prob_comp){ fp << setw(6) << prob_comp * prob << " " << setw(6) << conf << " "; for(LINT i = 0; i < nitems; i++) fp << " " << items[i]; fp << endl; return;}void String::display(ofstream &fp, StringSet &lits, LINT prob_comp){ LINT i, j; StringP lstr; fp << setw(6) << prob_comp * prob << " " << setw(6) << conf << " "; for(i = 0; i < nitems; i++) { fp << " << "; lstr = lits.get_pat(items[i]); for (j = 0; j < lstr->nitems; j++) fp << lstr->items[j] << " "; fp << ">>"; } fp << endl; return;}// shuffles items in string//// void String::shuffle(void)// {// UniformDist ud;// LINT i, j, ival;// FLOAT fval;// // associate a random value with each item// for (i = 0; i < nitems; i++)// rval[i] = ud(); // // sort items according to the values in rval// for (i = 0; i < nitems; i++ )// {// ival = items[i]; fval = rval[i];// for ( j = i; j > 0 && rval[j-1] > fval; j-- ) {// items[j] = items[j-1];// rval[j] = rval[j-1];// }// items[j] = ival;// rval[j] = fval;// }// }//------------------------------- StringSet -------------------------------StringSet::StringSet(LINT nitems, // number of items PatternPar par, // npats, patlen, corr, conf & conf_var Taxonomy *ptax, // taxonomy (optional) FLOAT rept, // repetition-level FLOAT rept_var // variation in repetition-level ) : tax(ptax){ NormalDist conf(par.conf, par.conf_var); ExpDist freq; ExpDist corr_lvl; PoissonDist len(par.patlen-1); // string length NormalDist repeat(rept, rept_var); UniformDist ud; items = new ItemSet(nitems, tax); // associate probabilities with items
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -