📄 gen.cpp
字号:
LINT i, j, num_same; FLOAT tot; npats = par.npats;// last_pat = 0; pat = new StringP [npats]; for (i = 0; i < npats; i++) { pat[i] = new String( 1+len() ); // fill correlated items if (par.corr > 0 && i > 0) { // correlated patterns // each pattern has some items same as the previous pattern num_same = LINT( pat[i]->size() * par.corr * corr_lvl() + 0.5 ); if ( num_same > pat[i-1]->size() ) num_same = pat[i-1]->size(); if ( num_same > pat[i]->size() ) num_same = pat[i]->size(); // choose num_same items at random from previous pattern Choose shuffle(pat[i-1]->size(), num_same); for (j = 0; j < num_same; j++) pat[i]->items[j] = pat[i-1]->item( shuffle.pos(j) );// pat[i-1]->shuffle(num_same);// for (j = 0; j < num_same; j++)// pat[i]->items[j] = pat[i-1]->rand_item(j); } else { // no correlation num_same = 0; } if (rept == 0) { // fill remaining items at random for (j = num_same; j < pat[i]->size(); j++) pat[i]->items[j] = items->get_item();// pat[i]->items[j] = LINT(1 + nitems * rand()); } else { // some items are repetitions FLOAT rept_lvl = repeat(); for (j = num_same; j < pat[i]->size(); j++) if ( j > 0 && ud() < rept_lvl ) // pick a previous item pat[i]->items[j] = pat[i]->items[ LINT(j*ud()) ]; else // pick random item pat[i]->items[j] = items->get_item(); } pat[i]->prob = freq(); // prob. that this pattern will be picked pat[i]->conf = conf(); // used in Transaction::add and CustSeq::add // to decide how many items to drop from // this pattern to corrupt it } if (tax) { // weight probabilites with geometric mean of probabilities of items for (i = 0; i < npats; i++) { DOUBLE weight = 1; for (j = 0; j < pat[i]->size(); j++) weight *= items->weight(pat[i]->items[j]);// cerr << "WEIGHT = " << weight; weight = pow(weight, DOUBLE(1)/pat[i]->size());// cerr << " " << weight << endl; pat[i]->prob *= weight; } } // normalize probabilites (why -- see get_pat) cum_prob = new FLOAT [npats]; tot = 0; for (i = 0; i < npats; i++) tot += pat[i]->prob; for (i = 0; i < npats; i++) pat[i]->prob /= tot; // calulate cumulative probabilities cum_prob[0] = pat[0]->prob; for (i = 1; i < npats; i++) cum_prob[i] = cum_prob[i-1] + pat[i]->prob;// cerr << cum_prob[npats-1] << endl << flush; // allocate space for answer LINT maxlen = 0; for (i = 1; i < npats; i++) if (pat[i]->size() > maxlen) maxlen = pat[i]->size(); answer = new String(maxlen);}StringSet::~StringSet(){ LINT i; for (i = 0; i < npats; i++) delete pat[i]; delete [] pat;}// specialize each item in pattern #i and store result in answer//StringP StringSet::specialize(LINT i){ answer->set_size( pat[i]->size() ); answer->set_conf_lvl( pat[i]->conf_lvl() ); for (LINT j = 0; j < pat[i]->size(); j++) answer->set_item(j, items->specialize( pat[i]->item(j) )); return answer;}// returns pattern #i//StringP StringSet::get_pat(LINT i){ if (!tax) return pat[i]; else return specialize(i);};void StringSet::display(ofstream &fp){ LINT i; items->display(fp); fp << "ItemSets:" << endl; fp << setprecision(3); // too lazy to do a sort, so print high-prob. patterns first for (i = 0; i < npats; i++) if (pat[i]->prob * npats > 10) pat[i]->display(fp, npats); for (i = 0; i < npats; i++) if (pat[i]->prob * npats <= 10 && pat[i]->prob * npats > 1) pat[i]->display(fp, npats); fp << setprecision(0); fp << endl;}void StringSet::display(ofstream &fp, StringSet &lits){ LINT i; fp << setprecision(3); // too lazy to do a sort, so print high-prob. patterns first for (i = 0; i < npats; i++) if (pat[i]->prob * npats > 6) pat[i]->display(fp, lits, npats); for (i = 0; i < npats; i++) if (pat[i]->prob * npats <= 6) pat[i]->display(fp, lits, npats); fp << setprecision(0);}//------------------------------- StringSet -------------------------------// returns a pattern chosen at random//StringP StringSetIter::get_pat(void){ FLOAT r; LINT i = 0; if (last_pat < 0) { last_pat = -last_pat; if (!strset->tax) return strset->pat[last_pat]; else return strset->specialize(last_pat); } // find the desired pattern using cum_prob table r = rand(); i = r * strset->npats; if (i == strset->npats) i--; while ( i < (strset->npats-1) && r > strset->cum_prob[i] ) i++; while ( i > 0 && r < strset->cum_prob[i-1] ) i--; last_pat = i; if (!strset->tax) return strset->pat[i]; else return strset->specialize(i);};void StringSetIter::unget_pat(void){ last_pat = -last_pat;} //------------------------------ Transaction ------------------------------// static variablesconst LINT Transaction::cid_len = 10;const LINT Transaction::tid_len = 10;const LINT Transaction::item_len = 10;LINT Transaction::tid = 0;Transaction::Transaction(LINT sz) : tlen(sz), nitems(0), maxsize(5 * sz){ // maximum size of a transaction is 5 * sz items = new LINT [maxsize];}Transaction::~Transaction(){ delete [] items;}void Transaction::sort(void){ LINT val; LINT i, j; for (i = 1; i < nitems; i++ ) { val = items[i]; for ( j = i; j > 0 && items[j-1] > val; j-- ) items[j] = items[j-1]; items[j] = val; }}BOOLEAN Transaction::add_item(LINT itm){ LINT i; for (i = 0; i < nitems; i++) if ( items[i] == itm ) return FALSE; if (nitems >= maxsize) { // allocate more memory LINT *old_items = items; maxsize *= 2; items = new LINT [maxsize]; for (i = 0; i < nitems; i++) items[i] = old_items[i]; delete [] old_items; } items[nitems++] = itm; return TRUE;}// adds pattern to transaction// returns TRUE if pattern was added, FALSE else//BOOLEAN Transaction::add(String &pat, BOOLEAN corrupt){ static UniformDist ud; LINT i, patlen; // corrupt the pattern by reducing its length; // conf_lvl for a pattern is decided at the time of pattern creation patlen = pat.size(); if ( corrupt ) while ( patlen > 0 && ud() > pat.conf_lvl() ) patlen--; // in half of the cases, we drop the pattern that won't fit if ( patlen+nitems > tlen ) // not enough space left if ( ud() > 0.5 ) return FALSE; // pick "patlen" items at random from pattern// if ( patlen < pat.size() ) Choose shuffle(pat.size(), patlen); for (i = 0; i < patlen; i++) add_item( pat.item(shuffle.pos(i)) ); // allocates extra space if necessary// pat.shuffle(patlen);// for (i = 0; i < patlen; i++)// add_item( pat.rand_item(i) ); // allocates extra space if necessary return TRUE;}void Transaction::write(ofstream &fp, LINT cid){ if ( nitems == 0 ) return; sort(); tid++; if (cid == 0) // no customer-id; set cust-id to trans-id cid = tid; fp.write((char *)&cid, sizeof(LINT)); fp.write((char *)&tid, sizeof(LINT)); fp.write((char *)&nitems, sizeof(LINT)); fp.write((char *)items, nitems * sizeof(LINT));}void Transaction::write_asc(ofstream &fp, LINT cid){ if ( nitems == 0 ) return; sort(); tid++; if (cid == 0) // no customer-id; set cust-id to trans-id cid = tid; for (LINT i = 0; i < nitems; i++) { fp << setw(cid_len) << cid << " "; fp << setw(tid_len) << tid << " "; fp << setw(item_len) << items[i] << endl; }}//------------------------------ CustSeq ------------------------------CustSeq::CustSeq(Cid cust_id, LINT seq_len, LINT trans_len) : cid(cust_id), slen(seq_len), tlen(trans_len), nitems(0), ntrans(seq_len), maxsize(5 * seq_len){ // we reallocate memory if necessary trans = new TransactionP [maxsize]; for (LINT i = 0; i < maxsize; i++) trans[i] = NULL;}CustSeq::~CustSeq(){ for (LINT i = 0; i < maxsize; i++) if ( trans[i] ) delete trans[i]; delete [] trans;}// adds pattern to CustSeq// returns TRUE if pattern was added, FALSE else// REWORK!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!//BOOLEAN CustSeq::add(String &pat, StringSet &lits){ static UniformDist ud; LINT i, patlen; LINT pos; LINT newitems, olditems; BOOLEAN corrupt; // if TRUE, corrupt transactions too if ( ud() > pat.conf_lvl() ) corrupt = TRUE; // corrupt transactions else corrupt = FALSE; // don't corrupt transactions // corrupt the pattern by reducing its length; // conf_lvl for a pattern is decided at the time of pattern creation patlen = pat.size(); if ( corrupt ) while ( patlen > 0 && ud() > pat.conf_lvl() ) patlen--; if ( patlen == 0 ) // no trans. left in sequence return TRUE; // allows transactions to be dropped randomly from the sequence// if ( patlen < pat.size() ) Choose shuffle(pat.size(), patlen);// pat.shuffle(patlen); // calculate # of items in pattern for (newitems = 0, i = 0; i < patlen; i++) newitems += lits.get_pat( pat.item( shuffle.pos(i) ) )->size();// newitems += lits.get_pat( pat.rand_item(i) )->size(); // in half of the cases, we drop the pattern that won't fit if ( (patlen > slen) || (newitems + nitems > slen * tlen) ) if ( ud() > 0.5 ) return FALSE; if ( patlen > maxsize ) { // need to allocate more memory TransactionP *old_trans = trans; LINT oldsize = maxsize; maxsize = patlen*2; trans = new TransactionP [maxsize]; for (i = 0; i < oldsize; i++) trans[i] = old_trans[i]; for (; i < maxsize; i++) trans[i] = NULL; delete [] old_trans; } // add new sequence Choose *shuffle1 = NULL; if (ntrans > patlen) shuffle1 = new Choose(ntrans, patlen); for (i = 0; i < patlen; i++) { if ( shuffle1 ) pos = shuffle1->pos(i); else pos = i; if ( trans[pos] == NULL ) trans[pos] = new Transaction(tlen); olditems = trans[pos]->size(); trans[pos]->add( *lits.get_pat(pat.item( shuffle.pos(i) )), corrupt ); // trans[pos]->add( *lits.get_pat(pat.rand_item(i)), corrupt ); nitems += trans[pos]->size() - olditems; // update count of #items } delete shuffle1;// pos = ud() * ntrans / patlen;// for (i = 0; i < patlen; i++)// {// if ( trans[pos] == NULL )// trans[pos] = new Transaction(tlen);// olditems = trans[pos]->size();// trans[pos]->add( *lits.get_pat(pat.item( shuffle.pos(i) )), corrupt ); // // trans[pos]->add( *lits.get_pat(pat.rand_item(i)), corrupt ); // nitems += trans[pos]->size() - olditems; // update count of #items// pos += 1 + ud() * ntrans / patlen;// } return TRUE;}void CustSeq::write(ofstream &fp){ for (LINT i = 0; i < ntrans-1; i++) if ( trans[i]->size() > 0 ) trans[i]->write(fp, cid);}void CustSeq::write_asc(ofstream &fp){ for (LINT i = 0; i < ntrans-1; i++) if ( trans[i]->size() > 0 ) trans[i]->write_asc(fp, cid);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -