📄 gen.h
字号:
#include "glob.h"#include "dist.h"#include <stream.h>#include <fstream.h>//============================= Parameters =============================// Parameters used for StringSet// StringSet can be either large itemsets, or sequences//class PatternPar {public: LINT npats; // number of patterns FLOAT patlen; // average length of pattern FLOAT corr; // correlation between consecutive patterns FLOAT conf; // average confidence in a rule FLOAT conf_var; // variation in the confidence LINT seed; PatternPar(void) : npats(10000), patlen(4.0), corr(0.25), conf(0.75), conf_var(0.1), seed(0) {} void write(ostream &fp);};// Parameters used to generate transactions//class TransPar {public: LINT ntrans; // number of transactions in database FLOAT tlen; // average transaction length LINT nitems; // number of items PatternPar lits; // parameters for potentially large itemsets BOOLEAN ascii; // Generate in ASCII format LINT seed; // Seed to initialize RandSeed with before x-act generation TransPar(void) : ntrans(1000000), tlen(10), nitems(100000), ascii(FALSE), seed(INIT_SEED) {} void write(ostream &fp);};// Parameters used to generate transactions//class TaxPar:public TransPar {public: LINT nroots; // number of roots FLOAT fanout; // average fanout at each interiori node FLOAT nlevels; // average number of levels FLOAT depth_ratio; // affects ratio of itemsets chosen from higher levels TaxPar(void) : nroots(0), fanout(0), nlevels(0), depth_ratio(1) {} void calc_values(void); // calculates nroots, given nlevels // default values: nroots = 250, fanout = 5 void write(ostream &fp);};// Parameters used to generate sequences//class SeqPar {public: LINT ncust; // number of customers in database FLOAT slen; // average sequence length FLOAT tlen; // average transaction length LINT nitems; // number of items FLOAT rept; // repetition-level (between 0 and 1) FLOAT rept_var; // variation in repetition-level BOOLEAN ascii; // Generate in ASCII format PatternPar lits; // parameters for potentially large itemsets PatternPar lseq; // parameters for potentially large sequences SeqPar(void) : ncust(100000), slen(10), tlen(2.5), nitems(10000), rept(0), rept_var(0.1) { lits.npats = 25000; lseq.npats = 5000; lits.patlen = 1.25; lseq.patlen = 4.0; } void write(ostream &fp);};//------------------------------ Taxonomy ------------------------------//// models taxonomy over items as a tree// 0 is a valid item here (get rid of it when actually adding item//class Taxonomy{ friend class TaxStat;private: LINT nitems; // number of items LINT nroots; // number of roots FLOAT depth; // used when assigning probabilities to items LINT *par; LINT *child_start; LINT *child_end; static const LINT item_len; // ASCII field-width of item-idpublic: Taxonomy(LINT nitems, LINT nroots, FLOAT fanout, FLOAT depth_ratio); ~Taxonomy(void); void write(ofstream &fp); // write taxonomy to file void write_asc(ofstream &fp); // write taxonomy to ASCII file void display(ofstream &fp); // display taxonomy (for user) FLOAT depth_ratio(void) { return depth; } LINT num_roots(void) { return nroots; } LINT root(Item itm) { return (par[itm] == -1); } LINT num_children(Item itm) { return child_end[itm] - child_start[itm]; } LINT child(Item itm, LINT n) { return child_start[itm]+n; } // returns the n'th child of itm LINT first_child(Item itm) { return child_start[itm]; } LINT last_child(Item itm) { return child_end[itm]-1; } Item parent(Item itm) { return par[itm]; } // -1 => no parent};//--------------------------------------------------------------------------//// 0 is a valid item here (get rid of it when actually adding item//class ItemSet{private: LINT nitems; // number of items Taxonomy *tax; // taxonomy (optional) FLOAT *cum_prob; // cumulative probability FLOAT *tax_prob; // cumulative probability of choosing a child UniformDist rand; void normalize(FLOAT prob[], LINT low, LINT high);public: ItemSet(LINT nitems, Taxonomy *tax = NULL); ~ItemSet(); void display(ofstream &fp); Item get_item(void); // returns a random item (weighted) Item specialize(Item itm); // if no taxonomy, returns itm FLOAT weight(Item itm); // returns prob. of choosing item};class String{friend class StringSet; LINT nitems; // number of items Item *items; // list of the items// FLOAT *rval; // random value (used to get random ordering of the items)// Item *ritems; // randomly chosen items FLOAT prob; // probability that this string is chosen FLOAT conf; // probability that this string is corrrupted // void shuffle(void); // shuffles items in stringpublic: String(LINT nitems); ~String(void); void display(ofstream &fp, LINT prob_comp = 1); void display(ofstream &fp, StringSet &lits, LINT prob_comp = 1); // prob is multiplied by prob_comp before being writeed LINT size(void) { return nitems;} Item item(LINT n) { return items[n];} // return nth item of the string FLOAT conf_lvl(void) { return conf; } void set_size(LINT newsize) { nitems = newsize;} void set_item(LINT n, Item itm) { items[n] = itm;} void set_conf_lvl(FLOAT newconf) { conf = newconf; }// void shuffle(LINT k); // allows selection of k random items from the string// Item rand_item(LINT n) { return ritems[n];} // works with shuffle};typedef String *StringP;class StringSet{ friend class StringSetIter;private: ItemSet *items; Taxonomy *tax; LINT npats; // number of patterns StringP *pat; // array of patterns StringP answer; FLOAT *cum_prob; // cumulative probabilities of patterns StringP specialize(LINT i); // specializes pattern #ipublic: StringSet(LINT nitems, // number of items PatternPar par, // npats, patlen, corr, conf & conf_var Taxonomy *tax = NULL, // taxonomy (optional) FLOAT rept = 0, // repetition-level FLOAT rept_lvl = 0.2 // variation in repetition-level ); ~StringSet(); void display(ofstream &fp); // for large itemsets void display(ofstream &fp, StringSet &lit); // for sequences StringP get_pat(LINT i); // returns pattern #i};class StringSetIter{private: UniformDist rand; StringSet *strset; LINT last_pat; // if -ve, unget_pat() was calledpublic: StringSetIter(StringSet &str_set) : strset(&str_set), last_pat(0) {}; StringP get_pat(void); // returns a random pattern void unget_pat(void); // the last pattern is put back in the sequence};//--------------------------------------------------------------------------class Transaction {private: LINT tlen; // expected number of items in transaction LINT nitems; // number of items currently in transaction LINT maxsize; // size of array items LINT *items; // items in the transaction static const LINT cid_len; // ASCII field-width of customer-id static const LINT tid_len; // ASCII field-width of transaction-id static const LINT item_len; // ASCII field-width of item-id static LINT tid; // transaction-id void sort(void); BOOLEAN add_item(LINT itm);// returns TRUE if added, FALSE if already presentpublic: Transaction(LINT sz); ~Transaction(); BOOLEAN add(String &pat, BOOLEAN corrupt = TRUE); // adds pattern to transaction // returns TRUE if added, FALSE if trans. full void write(ofstream &fp, LINT cid = 0); void write_asc(ofstream &fp, LINT cid = 0); LINT size(void) { return nitems; }};typedef Transaction *TransactionP;class CustSeq {private: Cid cid; // customer-id LINT slen; // expected number of transactions in sequence LINT tlen; // avg. expected number of items in a transaction LINT ntrans; // number of transactions in sequence LINT nitems; // number of items in sequence LINT maxsize; // size of array trans TransactionP *trans; // transaction in the sequencepublic: CustSeq(Cid cid, LINT seq_len, LINT tot_items); ~CustSeq(void); BOOLEAN add(String &pat, StringSet &lits); // adds pattern to transaction void write(ofstream &fp); void write_asc(ofstream &fp); LINT size(void) { return nitems; }};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -