⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gen.cpp

📁 IBM实验室提供的数据集生成器源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
  LINT i, j, num_same;  FLOAT tot;  npats = par.npats;//  last_pat = 0;  pat = new StringP [npats];  for (i = 0; i < npats; i++)    {      pat[i] = new String( 1+len() );      // fill correlated items      if (par.corr > 0 && i > 0) {	// correlated patterns	// each pattern has some items same as the previous pattern	num_same = LINT( pat[i]->size() * par.corr * corr_lvl() + 0.5 );	if ( num_same > pat[i-1]->size() )	  num_same = pat[i-1]->size();	if ( num_same > pat[i]->size() )	  num_same = pat[i]->size();	// choose num_same items at random from previous pattern	Choose shuffle(pat[i-1]->size(), num_same);	for (j = 0; j < num_same; j++)	  pat[i]->items[j] = pat[i-1]->item( shuffle.pos(j) );//	pat[i-1]->shuffle(num_same);//	for (j = 0; j < num_same; j++)//	  pat[i]->items[j] = pat[i-1]->rand_item(j);      }      else {	// no correlation	num_same = 0;      }      if (rept == 0) {	// fill remaining items at random	for (j = num_same; j < pat[i]->size(); j++)	  pat[i]->items[j] = items->get_item();//	pat[i]->items[j] = LINT(1 + nitems * rand());      }      else {	// some items are repetitions	FLOAT rept_lvl = repeat();	for (j = num_same; j < pat[i]->size(); j++)	  if ( j > 0 && ud() < rept_lvl )	// pick a previous item	    pat[i]->items[j] = pat[i]->items[ LINT(j*ud()) ];	  else	// pick random item	    pat[i]->items[j] = items->get_item();      }      pat[i]->prob = freq(); // prob. that this pattern will be picked      pat[i]->conf = conf(); // used in Transaction::add and CustSeq::add      			     // to decide how many items to drop from			     //  this pattern to corrupt it    }  if (tax) {    // weight probabilites with geometric mean of probabilities of items    for (i = 0; i < npats; i++)      {	DOUBLE weight = 1;	for (j = 0; j < pat[i]->size(); j++)	  weight *= items->weight(pat[i]->items[j]);//	cerr << "WEIGHT = " << weight;	weight = pow(weight, DOUBLE(1)/pat[i]->size());//	cerr << "  " << weight << endl;	pat[i]->prob *= weight;      }  }  // normalize probabilites (why -- see get_pat)  cum_prob = new FLOAT [npats];  tot = 0;  for (i = 0; i < npats; i++)    tot += pat[i]->prob;  for (i = 0; i < npats; i++)    pat[i]->prob /= tot;  // calulate cumulative probabilities  cum_prob[0] = pat[0]->prob;  for (i = 1; i < npats; i++)    cum_prob[i] = cum_prob[i-1] + pat[i]->prob;//  cerr << cum_prob[npats-1] << endl << flush;  // allocate space for answer  LINT maxlen = 0;  for (i = 1; i < npats; i++)    if (pat[i]->size() > maxlen)      maxlen = pat[i]->size();  answer = new String(maxlen);}StringSet::~StringSet(){  LINT i;  for (i = 0; i < npats; i++)    delete pat[i];  delete [] pat;}// specialize each item in pattern #i and store result in answer//StringP StringSet::specialize(LINT i){  answer->set_size( pat[i]->size() );  answer->set_conf_lvl( pat[i]->conf_lvl() );  for (LINT j = 0; j < pat[i]->size(); j++)    answer->set_item(j, items->specialize( pat[i]->item(j) ));  return answer;}// returns pattern #i//StringP StringSet::get_pat(LINT i){   if (!tax)    return pat[i];  else    return specialize(i);};void StringSet::display(ofstream &fp){  LINT i;  items->display(fp);  fp << "ItemSets:" << endl;  fp << setprecision(3);  // too lazy to do a sort, so print high-prob. patterns first  for (i = 0; i < npats; i++)    if (pat[i]->prob * npats > 10)      pat[i]->display(fp, npats);  for (i = 0; i < npats; i++)    if (pat[i]->prob * npats <= 10 && pat[i]->prob * npats > 1)      pat[i]->display(fp, npats);  fp << setprecision(0);  fp << endl;}void StringSet::display(ofstream &fp, StringSet &lits){  LINT i;  fp << setprecision(3);  // too lazy to do a sort, so print high-prob. patterns first  for (i = 0; i < npats; i++)    if (pat[i]->prob * npats > 6)      pat[i]->display(fp, lits, npats);  for (i = 0; i < npats; i++)    if (pat[i]->prob * npats <= 6)      pat[i]->display(fp, lits, npats);  fp << setprecision(0);}//------------------------------- StringSet -------------------------------// returns a pattern chosen at random//StringP StringSetIter::get_pat(void){   FLOAT r;  LINT i = 0;  if (last_pat < 0) {    last_pat = -last_pat;    if (!strset->tax)      return strset->pat[last_pat];    else      return strset->specialize(last_pat);  }  // find the desired pattern using cum_prob table  r = rand();  i = r * strset->npats;  if (i == strset->npats)    i--;  while ( i < (strset->npats-1) && r > strset->cum_prob[i] )    i++;  while ( i > 0 && r < strset->cum_prob[i-1] )    i--;  last_pat = i;  if (!strset->tax)    return strset->pat[i];  else    return strset->specialize(i);};void StringSetIter::unget_pat(void){  last_pat = -last_pat;}    //------------------------------ Transaction ------------------------------// static variablesconst LINT Transaction::cid_len = 10;const LINT Transaction::tid_len = 10;const LINT Transaction::item_len = 10;LINT Transaction::tid = 0;Transaction::Transaction(LINT sz)  : tlen(sz), nitems(0), maxsize(5 * sz){  // maximum size of a transaction is 5 * sz  items = new LINT [maxsize];}Transaction::~Transaction(){  delete [] items;}void Transaction::sort(void){  LINT val;  LINT i, j;  for (i = 1; i < nitems; i++ )    {      val = items[i];      for ( j = i; j > 0 && items[j-1] > val; j-- )	items[j] = items[j-1];      items[j] = val;    }}BOOLEAN Transaction::add_item(LINT itm){   LINT i;  for (i = 0; i < nitems; i++)    if ( items[i] == itm ) return FALSE;  if (nitems >= maxsize) {	// allocate more memory    LINT *old_items = items;    maxsize *= 2;    items = new LINT [maxsize];    for (i = 0; i < nitems; i++)      items[i] = old_items[i];    delete [] old_items;  }  items[nitems++] = itm;  return TRUE;}// adds pattern to transaction// returns TRUE if pattern was added, FALSE else//BOOLEAN Transaction::add(String &pat, BOOLEAN corrupt){  static UniformDist ud;  LINT i, patlen;  // corrupt the pattern by reducing its length;  // conf_lvl for a pattern is decided at the time of pattern creation  patlen = pat.size();  if ( corrupt )    while ( patlen > 0 && ud() > pat.conf_lvl() )      patlen--;    // in half of the cases, we drop the pattern that won't fit  if ( patlen+nitems > tlen )	// not enough space left    if ( ud() > 0.5 )      return FALSE;    // pick "patlen" items at random from pattern//  if ( patlen < pat.size() )  Choose shuffle(pat.size(), patlen);  for (i = 0; i < patlen; i++)    add_item( pat.item(shuffle.pos(i)) ); // allocates extra space if necessary//    pat.shuffle(patlen);//  for (i = 0; i < patlen; i++)//    add_item( pat.rand_item(i) ); // allocates extra space if necessary    return TRUE;}void Transaction::write(ofstream &fp, LINT cid){  if ( nitems == 0 )    return;  sort();  tid++;  if (cid == 0)		// no customer-id; set cust-id to trans-id    cid = tid;  fp.write((char *)&cid, sizeof(LINT));  fp.write((char *)&tid, sizeof(LINT));  fp.write((char *)&nitems, sizeof(LINT));  fp.write((char *)items, nitems * sizeof(LINT));}void Transaction::write_asc(ofstream &fp, LINT cid){  if ( nitems == 0 )    return;  sort();  tid++;  if (cid == 0)		// no customer-id; set cust-id to trans-id    cid = tid;  for (LINT i = 0; i < nitems; i++) {    fp << setw(cid_len) << cid << " ";    fp << setw(tid_len) << tid << " ";    fp << setw(item_len) << items[i] << endl;  }}//------------------------------ CustSeq ------------------------------CustSeq::CustSeq(Cid cust_id, LINT seq_len, LINT trans_len)  : cid(cust_id), slen(seq_len), tlen(trans_len), nitems(0),    ntrans(seq_len), maxsize(5 * seq_len){  // we reallocate memory if necessary  trans = new TransactionP [maxsize];  for (LINT i = 0; i < maxsize; i++)    trans[i] = NULL;}CustSeq::~CustSeq(){  for (LINT i = 0; i < maxsize; i++)    if ( trans[i] )      delete trans[i];  delete [] trans;}// adds pattern to CustSeq// returns TRUE if pattern was added, FALSE else// REWORK!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!//BOOLEAN CustSeq::add(String &pat, StringSet &lits){  static UniformDist ud;  LINT i, patlen;  LINT pos;  LINT newitems, olditems;  BOOLEAN corrupt;	// if TRUE, corrupt transactions too  if ( ud() > pat.conf_lvl() )    corrupt = TRUE;		// corrupt transactions  else    corrupt = FALSE;		// don't corrupt transactions  // corrupt the pattern by reducing its length;  // conf_lvl for a pattern is decided at the time of pattern creation  patlen = pat.size();  if ( corrupt )    while ( patlen > 0 && ud() > pat.conf_lvl() )      patlen--;  if ( patlen == 0 )	// no trans. left in sequence    return TRUE;  // allows transactions to be dropped randomly from the sequence//  if ( patlen < pat.size() )  Choose shuffle(pat.size(), patlen);//    pat.shuffle(patlen);  // calculate # of items in pattern  for (newitems = 0, i = 0; i < patlen; i++)    newitems += lits.get_pat( pat.item( shuffle.pos(i) ) )->size();//    newitems += lits.get_pat( pat.rand_item(i) )->size();  // in half of the cases, we drop the pattern that won't fit  if ( (patlen > slen) || (newitems + nitems > slen * tlen) )    if ( ud() > 0.5 )      return FALSE;  if ( patlen > maxsize ) {	// need to allocate more memory    TransactionP *old_trans = trans;    LINT oldsize = maxsize;    maxsize = patlen*2;    trans = new TransactionP [maxsize];    for (i = 0; i < oldsize; i++)      trans[i] = old_trans[i];    for (; i < maxsize; i++)      trans[i] = NULL;    delete [] old_trans;  }  // add new sequence  Choose *shuffle1 = NULL;  if (ntrans > patlen)    shuffle1 = new Choose(ntrans, patlen);  for (i = 0; i < patlen; i++)    {      if ( shuffle1 )	pos = shuffle1->pos(i);      else	pos = i;      if ( trans[pos] == NULL )	trans[pos] = new Transaction(tlen);      olditems = trans[pos]->size();      trans[pos]->add( *lits.get_pat(pat.item( shuffle.pos(i) )), corrupt ); //      trans[pos]->add( *lits.get_pat(pat.rand_item(i)), corrupt );       nitems += trans[pos]->size() - olditems;  // update count of #items    }  delete shuffle1;//   pos = ud() * ntrans / patlen;//   for (i = 0; i < patlen; i++)//     {//       if ( trans[pos] == NULL )// 	trans[pos] = new Transaction(tlen);//       olditems = trans[pos]->size();//       trans[pos]->add( *lits.get_pat(pat.item( shuffle.pos(i) )), corrupt ); // //      trans[pos]->add( *lits.get_pat(pat.rand_item(i)), corrupt ); //       nitems += trans[pos]->size() - olditems;  // update count of #items//       pos += 1 + ud() * ntrans / patlen;//     }  return TRUE;}void CustSeq::write(ofstream &fp){  for (LINT i = 0; i < ntrans-1; i++)    if ( trans[i]->size() > 0 )      trans[i]->write(fp, cid);}void CustSeq::write_asc(ofstream &fp){  for (LINT i = 0; i < ntrans-1; i++)    if ( trans[i]->size() > 0 )      trans[i]->write_asc(fp, cid);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -