⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gendata.cpp

📁 关联规则挖掘数据产生程序.VISUAL C++ 可产生满足要求的挖掘数据.
💻 CPP
字号:
// GenData.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <stdlib.h>
#include <new.h>
#include <string.h>

#include "Parameters.h"
#include "ARString.h"
#include "Taxonomy.h"
#include "Transaction.h"

void print_version(void);
void get_args(TransPar &par, int argc, char **argv);
void get_args(TaxPar &par, int argc, char **argv);
void get_args(SeqPar &par, int argc, char **argv);

void gen_rules(TransPar &par);
Transaction *mk_tran(StringSetIter &lits, int tlen, Taxonomy *tax = NULL);

void gen_seq(SeqPar &par);
CustSeq *mk_seq(int cid, StringSetIter &lseq, StringSet &lits, int slen, int tlen);

char data_file[256];
char pat_file[256];
char tax_file[256];


void memory_err(void)
{
  cout << "A memory allocation error occurred. \n";
  exit(1);
}



// Generate Transactions
//
void gen_rules(TransPar &par)
{
  StringSet *lits;
  StringSetIter *patterns;
  Transaction *trans;
  PoissonDist *tlen;

  ofstream data_fp;
  ofstream pat_fp;

  data_fp.open(data_file, ios::trunc);
  pat_fp.open(pat_file, ios::trunc);
  
  lits = new StringSet(par.nitems, par.lits);

  // Reset random seed generator for before generating transactions
  if (par.seed < 0) RandSeed::set_seed(par.seed);

  tlen = new PoissonDist(par.tlen-1);

  par.write(pat_fp);
  lits->display(pat_fp);

  patterns = new StringSetIter(*lits);
  for (int i = 0; i < par.ntrans; i ++)
    {
      trans = mk_tran(*patterns, (*tlen)()+1);
      if (par.ascii) 
	trans->write_asc(data_fp);
      else 
	trans->write(data_fp);
      delete trans;
    }
  
  data_fp.close();
  pat_fp.close();
}


// Generate Transactions and Taxonomy
//
void gen_taxrules(TaxPar &par)
{
  Taxonomy *tax;
  StringSet *lits;
  StringSetIter *patterns;
  Transaction *trans;
  PoissonDist *tlen;

  ofstream data_fp;
  ofstream pat_fp;
  ofstream tax_fp;

  data_fp.open(data_file, ios::trunc);
  pat_fp.open(pat_file, ios::trunc);
  tax_fp.open(tax_file, ios::trunc);
  if (data_fp.fail() || pat_fp.fail() || tax_fp.fail()) {
    cerr << "Error opening output file" << endl;
    exit(1);
  }
  
  // generate taxonomy and write it to file
  tax = new Taxonomy(par.nitems, par.nroots, par.fanout, par.depth_ratio);
  if (par.ascii) 
    tax->write_asc(tax_fp);
  else
    tax->write(tax_fp);

  tlen = new PoissonDist(par.tlen-1);

  lits = new StringSet(par.nitems, par.lits, tax);

  par.write(pat_fp);
  lits->display(pat_fp);

  patterns = new StringSetIter(*lits);
  for (int i = 0; i < par.ntrans; i ++)
    {
      trans = mk_tran(*patterns, (*tlen)()+1, tax);
      if (par.ascii) 
	trans->write_asc(data_fp);
      else 
	trans->write(data_fp);
      delete trans;
      delete trans;
    }
  
  data_fp.close();
  pat_fp.close();
  tax_fp.close();
}


// Generate a transaction
//
Transaction *mk_tran(StringSetIter &lits,  	// table of patterns
		     int tlen,			// transaction length
		     Taxonomy *tax
		     )
{
  Transaction *trans;
  StringP pat;
  
  trans = new Transaction(tlen);
  while (trans->size() < tlen)
    {
      pat = lits.get_pat();		// get a pattern
      if ( !trans->add(*pat) ) {
	// this pattern didn't fit in the transaction
	lits.unget_pat();
	break;
      }
    }
  return trans;
}


// Generate Sequences
//
void gen_seq(SeqPar &par)
{
  StringSet *lseq;	// potentially large sequences
  StringSetIter *patterns;
  StringSet *lits;	// potentially large itemsets	
  CustSeq *cust;	// 
  PoissonDist *slen;
  PoissonDist *tlen;

  ofstream data_fp;
  ofstream pat_fp;

  data_fp.open(data_file, ios::trunc);
  pat_fp.open(pat_file, ios::trunc);
  
  slen = new PoissonDist(par.slen-1);
  tlen = new PoissonDist(par.tlen-1);

  lits = new StringSet(par.nitems, par.lits);
  lseq = new StringSet(par.lits.npats, par.lseq, NULL, par.rept, par.rept_var);

//  pat_fp << "Large Itemsets:" << endl;
//  lits->write(pat_fp);
//  pat_fp << endl << endl << "Sequences:" << endl;
  par.write(pat_fp);
  lseq->display(pat_fp, *lits);

  patterns = new StringSetIter(*lseq);
  for (int i = 0; i < par.ncust; i ++)
    {
      cust = mk_seq(i+1, *patterns, *lits, (*slen)()+1, (*tlen)()+1);
      if (par.ascii) 
	cust->write_asc(data_fp);
      else 
	cust->write(data_fp);
      delete cust;
    }
  
  data_fp.close();
  pat_fp.close();
}


// Generate a customer-sequence
//
CustSeq *mk_seq(int cid,		// customer-id
		StringSetIter &lseq,	// table of large sequences
		StringSet &lits,	// table of large itemsets
		int slen,		// sequence length
		int tlen		// avg. transaction length
		)
{
  CustSeq *cust;
  StringP pat;
  
  cust = new CustSeq(cid, slen, tlen);
  while (cust->size() < slen * tlen)
    {
      pat = lseq.get_pat();      // get a pattern
      if ( !cust->add(*pat, lits) ) {	// transaction full
	lseq.unget_pat();
	break;
      }
    }
  return cust;
}


int main(int argc, char **argv)
{

//  set_new_handler(memory_err);

  if (strcmp(argv[1], "lit") == 0) {
    // For Rules
    TransPar par;

    get_args(par, argc, argv);   // get arguments
    gen_rules(par);              // generate rules (really, just transactions)
  }

  else if (strcmp(argv[1], "seq") == 0) {
    // For Sequences
    SeqPar par;

    get_args(par, argc, argv);   // get arguments
    gen_seq(par);                // generate sequences
  }

  else if (strcmp(argv[1], "tax") == 0) {
    // For Rules with Taxonomies
    TaxPar par;

    get_args(par, argc, argv);   // get arguments
    gen_taxrules(par);           // generate rules (really, just transactions)
  }

  else if (strcmp(argv[1], "-version") == 0) {
    print_version();
    return 0;
  }

  else {
    cerr << "Synthetic Data Generation, ";
    print_version();
    cerr << "Usage:  " << argv[0] << " lit|tax|seq [options]\n";
    cerr << "        " << argv[0] 
      << " lit|tax|seq -help     For more detailed list of options\n";
    return 1;
  }

  return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -