⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 arstring.cpp

📁 关联规则挖掘数据产生程序.VISUAL C++ 可产生满足要求的挖掘数据.
💻 CPP
字号:
// ARString.cpp: implementation of the ARString class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "ARString.h"
#include "Choose.h"

#include <iomanip.h>
#include <math.h>
#include <stdio.h>

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

//--------------------------------- String ---------------------------------


ARString::ARString(int n)	// number of items
  : nitems(n)
{
  items = new int [nitems];
//  rval = new float [nitems];
//  ritems = new int [nitems];
}


ARString::~ARString(void)
{ 
  delete [] items;
//  delete [] rval;
//  delete [] ritems;
}


void ARString::display(ofstream &fp, int prob_comp)
{
  fp << setw(6) << prob_comp * prob << " " << setw(6) << conf << " ";
  for(int i = 0; i < nitems; i++) 
    fp << " " << items[i];
  fp << endl;
  return;
}

void ARString::display(ofstream &fp, StringSet &lits, int prob_comp)
{
  int i, j;
  StringP lstr;

  fp << setw(6) << prob_comp * prob << " " << setw(6) << conf << " ";
  for(i = 0; i < nitems; i++) 
    {
      fp << "  << ";
      lstr = lits.get_pat(items[i]);
      for (j = 0; j < lstr->nitems; j++) 
	fp << lstr->items[j] << " ";
      fp << ">>";
    }
  fp << endl;
  return;
}



//------------------------------- StringSet -------------------------------


StringSet::StringSet(int nitems, 	// number of items
		     PatternPar par,	// npats, patlen, corr, conf & conf_var
		     Taxonomy *ptax,	// taxonomy (optional)
		     float rept,	// repetition-level
		     float rept_var	// variation in repetition-level
		     )
  : tax(ptax)
{
  NormalDist conf(par.conf, par.conf_var);
  ExpDist freq;
  ExpDist corr_lvl;
  PoissonDist len(par.patlen-1);	// string length
  NormalDist repeat(rept, rept_var);
  UniformDist ud;

  items = new ItemSet(nitems, tax);	// associate probabilities with items

  int i, j, num_same;
  float tot;

  npats = par.npats;
//  last_pat = 0;
  pat = new StringP [npats];
  for (i = 0; i < npats; i++)
    {
      pat[i] = new ARString( 1+len() );

      // fill correlated items
      if (par.corr > 0 && i > 0) {	// correlated patterns
	// each pattern has some items same as the previous pattern
	num_same = int( pat[i]->size() * par.corr * corr_lvl() + 0.5 );
	if ( num_same > pat[i-1]->size() )
	  num_same = pat[i-1]->size();
	if ( num_same > pat[i]->size() )
	  num_same = pat[i]->size();
	// choose num_same items at random from previous pattern
	CChoose shuffle(pat[i-1]->size(), num_same);
	for (j = 0; j < num_same; j++)
	  pat[i]->items[j] = pat[i-1]->item( shuffle.pos(j) );
//	pat[i-1]->shuffle(num_same);
//	for (j = 0; j < num_same; j++)
//	  pat[i]->items[j] = pat[i-1]->rand_item(j);
      }
      else {	// no correlation
	num_same = 0;
      }

      if (rept == 0) {
	// fill remaining items at random
	for (j = num_same; j < pat[i]->size(); j++)
	  pat[i]->items[j] = items->get_item();
//	pat[i]->items[j] = int(1 + nitems * rand());
      }
      else {
	// some items are repetitions
	float rept_lvl = repeat();
	for (j = num_same; j < pat[i]->size(); j++)
	  if ( j > 0 && ud() < rept_lvl )	// pick a previous item
	    pat[i]->items[j] = pat[i]->items[ int(j*ud()) ];
	  else	// pick random item
	    pat[i]->items[j] = items->get_item();
      }
      pat[i]->prob = freq(); // prob. that this pattern will be picked
      pat[i]->conf = conf(); // used in Transaction::add and CustSeq::add
      			     // to decide how many items to drop from
			     //  this pattern to corrupt it
    }

  if (tax) {
    // weight probabilites with geometric mean of probabilities of items
    for (i = 0; i < npats; i++)
      {
	double weight = 1;
	for (j = 0; j < pat[i]->size(); j++)
	  weight *= items->weight(pat[i]->items[j]);
//	cerr << "WEIGHT = " << weight;
	weight = pow(weight, double(1)/pat[i]->size());
//	cerr << "  " << weight << endl;
	pat[i]->prob *= weight;
      }
  }

  // normalize probabilites (why -- see get_pat)
  cum_prob = new float [npats];
  tot = 0;
  for (i = 0; i < npats; i++)
    tot += pat[i]->prob;
  for (i = 0; i < npats; i++)
    pat[i]->prob /= tot;

  // calulate cumulative probabilities
  cum_prob[0] = pat[0]->prob;
  for (i = 1; i < npats; i++)
    cum_prob[i] = cum_prob[i-1] + pat[i]->prob;
//  cerr << cum_prob[npats-1] << endl << flush;

  // allocate space for answer
  int maxlen = 0;
  for (i = 1; i < npats; i++)
    if (pat[i]->size() > maxlen)
      maxlen = pat[i]->size();
  answer = new ARString(maxlen);
}


StringSet::~StringSet()
{
  int i;

  for (i = 0; i < npats; i++)
    delete pat[i];
  delete [] pat;
}


// specialize each item in pattern #i and store result in answer
//
StringP StringSet::specialize(int i)
{
  answer->set_size( pat[i]->size() );
  answer->set_conf_lvl( pat[i]->conf_lvl() );
  for (int j = 0; j < pat[i]->size(); j++)
    answer->set_item(j, items->specialize( pat[i]->item(j) ));
  return answer;
}


// returns pattern #i
//
StringP StringSet::get_pat(int i)
{ 
  if (!tax)
    return pat[i];
  else
    return specialize(i);
};


void StringSet::display(ofstream &fp)
{
  int i;

  items->display(fp);

  fp << "ItemSets:" << endl;
  fp << setprecision(3);
  // too lazy to do a sort, so print high-prob. patterns first
  for (i = 0; i < npats; i++)
    if (pat[i]->prob * npats > 10)
      pat[i]->display(fp, npats);
  for (i = 0; i < npats; i++)
    if (pat[i]->prob * npats <= 10 && pat[i]->prob * npats > 1)
      pat[i]->display(fp, npats);
  fp << setprecision(0);
  fp << endl;
}


void StringSet::display(ofstream &fp, StringSet &lits)
{
  int i;

  fp << setprecision(3);
  // too lazy to do a sort, so print high-prob. patterns first
  for (i = 0; i < npats; i++)
    if (pat[i]->prob * npats > 6)
      pat[i]->display(fp, lits, npats);
  for (i = 0; i < npats; i++)
    if (pat[i]->prob * npats <= 6)
      pat[i]->display(fp, lits, npats);
  fp << setprecision(0);
}


//------------------------------- StringSet -------------------------------


// returns a pattern chosen at random
//
StringP StringSetIter::get_pat(void)
{ 
  float r;
  int i = 0;

  if (last_pat < 0) {
    last_pat = -last_pat;
    if (!strset->tax)
      return strset->pat[last_pat];
    else
      return strset->specialize(last_pat);
  }

  // find the desired pattern using cum_prob table
  r = rand();
  i = r * strset->npats;
  if (i == strset->npats)
    i--;
  while ( i < (strset->npats-1) && r > strset->cum_prob[i] )
    i++;
  while ( i > 0 && r < strset->cum_prob[i-1] )
    i--;
  last_pat = i;

  if (!strset->tax)
    return strset->pat[i];
  else
    return strset->specialize(i);
};


void StringSetIter::unget_pat(void)
{
  last_pat = -last_pat;
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -