📄 treeminer.cpp,v
字号:
static idnode *n1, *n2; int i1 = 0, i2 = 0; int e1, e2; while (i1 < l1->size() && i2 < l2->size()){ n1 = &(*l1)[i1]; n2 = &(*l2)[i2]; //look for matching cids if (n1->cid < n2->cid) i1++; else if (n1->cid > n2->cid) i2++; else{ //cids match e1 = i1; e2 = i2; //check the cid end positions in it1 and it2 while (e1 < l1->size() && (*l1)[e1].cid == n1->cid) e1++; while (e2 < l2->size() && (*l2)[e2].cid == n2->cid) e2++; //increment support if candidate found if (ins) check_ins(l1, l2, ins, i1, i2, e1, e2); if (outs) check_outs(l1, l2, outs, i1, i2, e1, e2); //restore index to end of cids i1 = e1; i2 = e2; } }}bool lexsmaller(vector<int> &subtree, vector<int> &cand){ int i,j; for (i=0, j=0; i < subtree.size() && j < cand.size();){ if (subtree[i] > cand[j]){ if (cand[j] != BranchIt) return false; else{ while (cand[j] == BranchIt) j++; if (subtree[i] > cand[j]) return false; else if (subtree[i] < cand[j]) return true; else return false; } } else if (subtree[i] < cand[j]){ if (subtree[i] != BranchIt) return true; else{ while(subtree[i] == BranchIt) i++; if (subtree[i] > cand[j]) return false; else if (subtree[i] < cand[j]) return true; else return true; } } else{ i++; j++; } } return false;}Eqnode *test_node(int iter, Eqclass *eq, int val, int pos){ Eqnode *eqn = NULL; //if noprune, return with a new Eqnode; don't prune for iter = 3 if (prune_type == noprune || iter == 3){ eqn = new Eqnode(val,pos); return eqn; } //perform pruning //prune based on frequent subtree static vector<int> cand; static vector<int> subtree; int hval; int scope, scnt; //form the candidate preifx cand = eq->prefix(); scnt = eq->get_scope(pos, scope); //what is the scope of node.pos while(scnt > scope){ cand.push_back(BranchIt); scnt--; } cand.push_back(val); //check subtrees int omita, omitb; bool res = true; //omit i-th item (omita) and associated BranchIt (omitb) int i,j,k; for (i=iter-3; i > 0; i--){ //find pos for i-th item for (j=0,k=0; j < cand.size(); j++){ if (cand[j] != BranchIt){ if (k == i){ omita = j; break; } else k++; } } //find pos for associated BranchIt scnt = 0; for(j++; j < cand.size() && scnt >= 0; j++){ if (cand[j] == BranchIt) scnt--; else scnt++; } if (scnt >= 0) omitb = cand.size(); else omitb = j-1; //cout << "OMIT " << i << " " << omita << " " << omitb << endl; hval = 0; subtree.clear(); for (k=0; k < cand.size(); k++){ if (k != omita && k != omitb){ subtree.push_back(cand[k]); if (cand[k] != BranchIt) hval += cand[k]; } } //cout << "LEXTEST " << subtree << " vs " << cand; if (lexsmaller(subtree, cand)){ //cout << " -- SMALLER "; res = FK.find(iter-1, subtree, hval); //cout << ((res)? " ** FOUND\n":" ** NOTFOUND\n"); if (!res) return NULL; //subtree not found! } //else cout << " -- GREATER " << endl; } if (res) eqn = new Eqnode(val,pos); else res = NULL; return eqn;}void enumerate_freq(Eqclass *eq, int iter){ TimeTracker tt; Eqclass *neq; list<Eqnode *>::iterator ni, nj; Eqnode *ins, *outs; if (prune_type == noprune) eq->sort_nodes(); //doesn't work with pruning //cout << "FX " << *eq << endl; for (ni = eq->nlist().begin(); ni != eq->nlist().end(); ++ni){ neq = new Eqclass; neq->set_prefix(eq->prefix(),*(*ni)); tt.Start(); for (nj = eq->nlist().begin(); nj != eq->nlist().end(); ++nj){ if ((*ni)->pos < (*nj)->pos) continue; ins = outs = NULL; if ((*ni)->pos > (*nj)->pos){ outs = test_node(iter, neq, (*nj)->val, (*nj)->pos); } else{ outs = test_node(iter, neq, (*nj)->val, (*nj)->pos); ins = test_node(iter, neq, (*nj)->val, neq->prefix().size()-1); } //cout << "prefix " << neq->print_prefix() << " -- " // << *(*nj) << " " << outs_depth << endl; if (ins || outs) get_intersect(&(*ni)->tlist, &(*nj)->tlist, ins, outs); if (outs){ stats.incrcand(iter-1); //cout << "OUTS " << *outs; if (notfrequent(*outs)) delete outs; else{ neq->add_node(outs); stats.incrlarge(iter-1); } } if (ins){ // cout << "INS " << *ins; stats.incrcand(iter-1); if (notfrequent(*ins)) delete ins; else{ neq->add_node(ins); stats.incrlarge(iter-1); } } } stats.incrtime(iter-1, tt.Stop()); if (!neq->nlist().empty()){ if (output) cout << *neq; if (prune_type == prune) FK.add(iter,neq); enumerate_freq(neq, iter+1); } delete neq; }}void form_f2_lists(Eqclass *eq){ list<Eqnode *>::iterator ni; idlist *l1, *l2; Eqnode *ins=NULL, *outs=NULL; int pit, nit; TimeTracker tt; tt.Start(); pit = eq->prefix()[0]; l1 = DCB->Idlists[pit]; for (ni=eq->nlist().begin(); ni != eq->nlist().end(); ++ni){ nit = (*ni)->val; l2 = DCB->Idlists[nit]; ins = (*ni); //cout << "LISTS " << pit << " " << nit << " " << l1->size() // << " " << l2->size() << " " << ins->tlist.size() << endl; get_intersect(l1, l2, ins, outs); //cout << "f2prefix " << eq->prefix() << endl; //cout << "f2 " << *ins; } stats.incrtime(1,tt.Stop());}void get_Fk(list<Eqclass *> &F2list){ Eqclass *eq; while(!F2list.empty()){ eq = F2list.front(); form_f2_lists(eq); //cout << *eq; switch(alg_type){ case treeminer: enumerate_freq(eq, 3); break; case maxtreeminer: cout << "NOT IMPLEMENTED\n"; break; } delete eq; F2list.pop_front(); }}int main(int argc, char **argv){ TimeTracker tt; tt.Start(); parse_args(argc, argv); DCB = new Dbase_Ctrl_Blk(infile); get_F1(); list<Eqclass *> *F2list = get_F2(); //DCB->print_vertical(); get_Fk(*F2list); for (int i=2; i < stats.size(); i++){ cout << "F" << i+1 << " - "; cout << stats[i].numlarge << " " << stats[i].numcand << endl; } double tottime = tt.Stop(); stats.tottime = tottime; cout << "TIME = " << tottime << endl; //write results to summary file ofstream summary("summary.out", ios::app); summary << "VTREEMINER "; switch(sort_type){ case incr: summary << "INCR "; break; case decr: summary << "DECR "; break; default: break; } switch(prune_type){ case prune: summary << "PRUNE "; break; deafult: break; } if (!count_unique) summary << "MULTIPLE "; if (use_fullpath) summary << "FULLPATH "; summary << infile << " " << MINSUP_PER << " " << DBASE_NUM_TRANS << " " << MINSUPPORT << " "; summary << stats << endl; summary.close(); exit(0);}@1.1log@Initial revision@text@d3 1d15 1d33 16d57 1a57 1 while ((c=getopt(argc,argv,"a:fi:los:u"))!=-1){d59 4a62 4 case 'a': //absolute support MINSUPPORT = atoi(optarg); break; case 'f':d75 4a78 1 case 's': //support value for L2d81 3d87 3a94 1d163 9d176 5a180 4 if (itcnt[i] >= MINSUPPORT) { if (output) cout << i << " - " << itcnt[i] << endl; DCB->FreqIdx[j] = i; DCB->FreqMap[i] = j;d183 1a183 1 else DCB->FreqMap[i] = -1;d188 5d269 1d303 1a303 1bool check_ins(idlist *l1, idlist *l2, Eqnode *ins, d308 1a308 1 bool retflg = false;d332 2a333 1 retflg = true;d350 2a351 1 return retflg;d355 1a355 1bool check_outs(idlist *l1, idlist *l2, Eqnode *outs,d360 1a360 1 bool retflg = false;d386 2a387 1 retflg = true;d398 1a398 1 return retflg;a407 1 bool flg;d425 3a427 9 //increment support if (ins){ flg = check_ins(l1, l2, ins, i1, i2, e1, e2); if (flg) ins->sup++; } if (outs){ flg = check_outs(l1, l2, outs, i1, i2, e1, e2); if (flg) outs->sup++; }d436 121d563 5d577 1a577 2 outs = new Eqnode ((*nj)->val, (*nj)->pos); ins = NULL;d580 2a581 2 outs = new Eqnode ((*nj)->val, (*nj)->pos); ins = new Eqnode ((*nj)->val, neq->prefix().size()-1);d586 2a587 1 get_intersect(&(*ni)->tlist, &(*nj)->tlist, ins, outs);d589 1a589 2 if (ins){ // cout << "INS " << *ins;d591 2a592 1 if (notfrequent(*ins)) delete ins;d594 1a594 1 neq->add_node(ins);d598 2a599 1 if (outs){d601 1a601 2 //cout << "OUTS " << *outs; if (notfrequent(*outs)) delete outs;d603 1a603 1 neq->add_node(outs);d611 1a617 1d644 1d649 9a657 1 enumerate_freq(eq, 3);d683 1a683 1 cout << stats << endl;d685 20a704 1 cout << "TIME = " << tottime << endl;@
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -