⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 treeminer.cpp,v

📁 关联规则中的频繁项集生成算法TreeMiner
💻 CPP,V
📖 第 1 页 / 共 2 页
字号:
   static idnode *n1, *n2;   int i1 = 0, i2 = 0;   int e1, e2;   while (i1 < l1->size() && i2 < l2->size()){      n1 = &(*l1)[i1];      n2 = &(*l2)[i2];      //look for matching cids      if (n1->cid < n2->cid) i1++;      else if (n1->cid > n2->cid) i2++;      else{         //cids match         e1 = i1;         e2 = i2;         //check the cid end positions in it1 and it2         while (e1 < l1->size() && (*l1)[e1].cid == n1->cid) e1++;         while (e2 < l2->size() && (*l2)[e2].cid == n2->cid) e2++;         //increment support if candidate found         if (ins) check_ins(l1, l2, ins, i1, i2, e1, e2);         if (outs) check_outs(l1, l2, outs, i1, i2, e1, e2);         //restore index to end of cids         i1 = e1;         i2 = e2;      }   }}bool lexsmaller(vector<int> &subtree, vector<int> &cand){   int i,j;   for (i=0, j=0; i < subtree.size() && j < cand.size();){      if (subtree[i] > cand[j]){         if (cand[j] != BranchIt) return false;         else{            while (cand[j] == BranchIt) j++;            if (subtree[i] > cand[j]) return false;            else if (subtree[i] < cand[j]) return true;            else return false;         }               }      else if (subtree[i] < cand[j]){         if (subtree[i] != BranchIt) return true;         else{            while(subtree[i] == BranchIt) i++;            if (subtree[i] > cand[j]) return false;            else if (subtree[i] < cand[j]) return true;            else return true;         }      }      else{         i++;         j++;      }   }   return false;}Eqnode *test_node(int iter, Eqclass *eq, int val, int pos){   Eqnode *eqn = NULL;      //if noprune, return with a new Eqnode; don't prune for iter = 3   if (prune_type == noprune || iter == 3){      eqn = new Eqnode(val,pos);      return eqn;   }      //perform pruning   //prune based on frequent subtree   static vector<int> cand;   static vector<int> subtree;    int hval;   int scope, scnt;   //form the candidate preifx   cand = eq->prefix();   scnt = eq->get_scope(pos, scope); //what is the scope of node.pos   while(scnt > scope){      cand.push_back(BranchIt);      scnt--;   }   cand.push_back(val);   //check subtrees   int omita, omitb;   bool res = true;   //omit i-th item (omita) and associated BranchIt (omitb)   int i,j,k;   for (i=iter-3; i > 0; i--){      //find pos for i-th item      for (j=0,k=0; j < cand.size(); j++){         if (cand[j] != BranchIt){            if (k == i){               omita = j;               break;            }            else k++;         }      }            //find pos for associated BranchIt      scnt = 0;      for(j++; j < cand.size() && scnt >= 0; j++){         if (cand[j] == BranchIt) scnt--;         else scnt++;      }      if (scnt >= 0) omitb = cand.size();      else omitb = j-1;      //cout << "OMIT " << i << " " << omita << " " << omitb << endl;      hval = 0;      subtree.clear();      for (k=0; k < cand.size(); k++){         if (k != omita && k != omitb){            subtree.push_back(cand[k]);            if (cand[k] != BranchIt) hval += cand[k];         }      }      //cout << "LEXTEST " << subtree << " vs " << cand;         if (lexsmaller(subtree, cand)){         //cout << " -- SMALLER ";         res = FK.find(iter-1, subtree, hval);           //cout << ((res)? " ** FOUND\n":" ** NOTFOUND\n");         if (!res) return NULL; //subtree not found!      }      //else cout << " -- GREATER " << endl;   }        if (res) eqn = new Eqnode(val,pos);   else res = NULL;   return eqn;}void enumerate_freq(Eqclass *eq, int iter){   TimeTracker tt;   Eqclass *neq;   list<Eqnode *>::iterator ni, nj;   Eqnode *ins, *outs;   if (prune_type == noprune) eq->sort_nodes(); //doesn't work with pruning   //cout << "FX " << *eq << endl;   for (ni = eq->nlist().begin(); ni != eq->nlist().end(); ++ni){      neq = new Eqclass;      neq->set_prefix(eq->prefix(),*(*ni));      tt.Start();      for (nj = eq->nlist().begin(); nj != eq->nlist().end(); ++nj){          if ((*ni)->pos < (*nj)->pos) continue;         ins = outs = NULL;         if ((*ni)->pos > (*nj)->pos){            outs = test_node(iter, neq, (*nj)->val, (*nj)->pos);         }         else{             outs = test_node(iter, neq, (*nj)->val, (*nj)->pos);            ins = test_node(iter, neq, (*nj)->val, neq->prefix().size()-1);         }           //cout << "prefix " << neq->print_prefix() << " -- "          //     << *(*nj) << " " << outs_depth << endl;         if (ins || outs)            get_intersect(&(*ni)->tlist, &(*nj)->tlist, ins, outs);         if (outs){            stats.incrcand(iter-1);            //cout << "OUTS " << *outs;            if (notfrequent(*outs)) delete outs;            else{               neq->add_node(outs);               stats.incrlarge(iter-1);            }         }         if (ins){            // cout << "INS " << *ins;            stats.incrcand(iter-1);            if (notfrequent(*ins)) delete ins;            else{               neq->add_node(ins);               stats.incrlarge(iter-1);            }         }      }      stats.incrtime(iter-1, tt.Stop());      if (!neq->nlist().empty()){         if (output) cout << *neq;         if (prune_type == prune) FK.add(iter,neq);         enumerate_freq(neq, iter+1);      }      delete neq;   }}void form_f2_lists(Eqclass *eq){   list<Eqnode *>::iterator ni;   idlist *l1, *l2;   Eqnode *ins=NULL, *outs=NULL;   int pit, nit;   TimeTracker tt;      tt.Start();   pit = eq->prefix()[0];   l1 = DCB->Idlists[pit];   for (ni=eq->nlist().begin(); ni != eq->nlist().end(); ++ni){      nit = (*ni)->val;      l2 = DCB->Idlists[nit];      ins = (*ni);      //cout << "LISTS " << pit << " " << nit << " " << l1->size()       //     << " " << l2->size() << " " << ins->tlist.size() << endl;      get_intersect(l1, l2, ins, outs);      //cout << "f2prefix " << eq->prefix() << endl;      //cout << "f2 " << *ins;   }   stats.incrtime(1,tt.Stop());}void get_Fk(list<Eqclass *> &F2list){   Eqclass *eq;   while(!F2list.empty()){      eq = F2list.front();      form_f2_lists(eq);      //cout << *eq;      switch(alg_type){      case treeminer:         enumerate_freq(eq, 3);          break;      case maxtreeminer:         cout << "NOT IMPLEMENTED\n";         break;      }      delete eq;      F2list.pop_front();   }}int main(int argc, char **argv){   TimeTracker tt;   tt.Start();    parse_args(argc, argv);      DCB = new Dbase_Ctrl_Blk(infile);    get_F1();   list<Eqclass *> *F2list = get_F2();   //DCB->print_vertical();   get_Fk(*F2list);   for (int i=2; i < stats.size(); i++){      cout << "F" << i+1 << " - ";      cout << stats[i].numlarge << " " << stats[i].numcand << endl;   }      double tottime = tt.Stop();   stats.tottime = tottime;   cout << "TIME = " << tottime << endl;   //write results to summary file   ofstream summary("summary.out", ios::app);    summary << "VTREEMINER ";   switch(sort_type){   case incr: summary << "INCR "; break;   case decr: summary << "DECR "; break;   default: break;   }   switch(prune_type){   case prune: summary << "PRUNE "; break;   deafult: break;   }   if (!count_unique) summary << "MULTIPLE ";   if (use_fullpath) summary << "FULLPATH ";   summary << infile << " " << MINSUP_PER << " "           << DBASE_NUM_TRANS << " " << MINSUPPORT << " ";   summary << stats << endl;   summary.close();      exit(0);}@1.1log@Initial revision@text@d3 1d15 1d33 16d57 1a57 1     while ((c=getopt(argc,argv,"a:fi:los:u"))!=-1){d59 4a62 4       case 'a': //absolute support	 MINSUPPORT = atoi(optarg);	 break;       case 'f':d75 4a78 1       case 's': //support value for L2d81 3d87 3a94 1d163 9d176 5a180 4      if (itcnt[i] >= MINSUPPORT) {	if (output) cout << i << " - " << itcnt[i] << endl;         DCB->FreqIdx[j] = i;         DCB->FreqMap[i] = j;d183 1a183 1      else DCB->FreqMap[i] = -1;d188 5d269 1d303 1a303 1bool check_ins(idlist *l1, idlist *l2, Eqnode *ins, d308 1a308 1   bool retflg = false;d332 2a333 1            retflg = true;d350 2a351 1   return retflg;d355 1a355 1bool check_outs(idlist *l1, idlist *l2, Eqnode *outs,d360 1a360 1   bool retflg = false;d386 2a387 1            retflg = true;d398 1a398 1   return retflg;a407 1   bool flg;d425 3a427 9         //increment support         if (ins){            flg = check_ins(l1, l2, ins, i1, i2, e1, e2);            if (flg) ins->sup++;         }         if (outs){            flg = check_outs(l1, l2, outs, i1, i2, e1, e2);            if (flg) outs->sup++;         }d436 121d563 5d577 1a577 2            outs = new Eqnode ((*nj)->val, (*nj)->pos);            ins = NULL;d580 2a581 2            outs = new Eqnode ((*nj)->val, (*nj)->pos);            ins = new Eqnode ((*nj)->val, neq->prefix().size()-1);d586 2a587 1         get_intersect(&(*ni)->tlist, &(*nj)->tlist, ins, outs);d589 1a589 2         if (ins){            // cout << "INS " << *ins;d591 2a592 1            if (notfrequent(*ins)) delete ins;d594 1a594 1               neq->add_node(ins);d598 2a599 1         if (outs){d601 1a601 2            //cout << "OUTS " << *outs;            if (notfrequent(*outs)) delete outs;d603 1a603 1               neq->add_node(outs);d611 1a617 1d644 1d649 9a657 1      enumerate_freq(eq, 3);d683 1a683 1   cout << stats << endl;d685 20a704 1   cout << "TIME = " << tottime << endl;@

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -